You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by co...@apache.org on 2020/10/03 00:29:45 UTC

[incubator-tvm-site] branch asf-site updated: Docs build at Sat Oct 3 00:28:58 UTC 2020

This is an automated email from the ASF dual-hosted git repository.

comaniac pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 76810e1  Docs build at Sat Oct  3 00:28:58 UTC 2020
76810e1 is described below

commit 76810e1d5b35a1145ae88baf0a9a209a992e2f57
Author: Cody Yu <co...@gmail.com>
AuthorDate: Sat Oct 3 00:28:59 2020 +0000

    Docs build at Sat Oct  3 00:28:58 UTC 2020
---
 .../tvmc_command_line_driver.py                    | 336 +++++++++++++
 .../tvmc_command_line_driver.ipynb                 | 149 ++++++
 .../sphx_glr_tvmc_command_line_driver_thumb.png    | Bin 0 -> 26786 bytes
 docs/_sources/contribute/release_process.rst.txt   |  13 +-
 .../auto_scheduler/sg_execution_times.rst.txt      |   6 +-
 .../auto_scheduler/tune_conv2d_layer_cuda.rst.txt  | 427 ++++++++++++----
 .../auto_scheduler/tune_matmul_x86.rst.txt         | 134 ++---
 .../tutorials/autotvm/sg_execution_times.rst.txt   |  16 +-
 .../tutorials/autotvm/tune_conv2d_cuda.rst.txt     |  42 +-
 .../tutorials/autotvm/tune_simple_template.rst.txt |  20 +-
 .../tutorials/dev/low_level_custom_pass.rst.txt    |   4 +-
 .../tutorials/dev/sg_execution_times.rst.txt       |   8 +-
 .../frontend/deploy_model_on_android.rst.txt       |   2 +-
 .../deploy_object_detection_pytorch.rst.txt        |   2 +-
 .../tutorials/frontend/deploy_prequantized.rst.txt |   4 +-
 .../frontend/deploy_prequantized_tflite.rst.txt    |   4 +-
 .../tutorials/frontend/deploy_ssd_gluoncv.rst.txt  |   2 +-
 docs/_sources/tutorials/frontend/from_onnx.rst.txt |   2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |  40 +-
 .../get_started/cross_compilation_and_rpc.rst.txt  |   2 +-
 .../get_started/relay_quick_start.rst.txt          |   2 +-
 .../get_started/sg_execution_times.rst.txt         |   9 +-
 .../get_started/tensor_expr_get_started.rst.txt    |   2 +-
 .../get_started/tvmc_command_line_driver.rst.txt   | 371 ++++++++++++++
 docs/_sources/tutorials/index.rst.txt              |  20 +
 docs/_sources/tutorials/language/scan.rst.txt      |  12 +-
 .../tutorials/language/schedule_primitives.rst.txt |   6 +-
 .../tutorials/language/sg_execution_times.rst.txt  |  18 +-
 docs/_sources/tutorials/language/tensorize.rst.txt |  12 +-
 .../tutorials/language/tuple_inputs.rst.txt        |   8 +-
 .../tutorials/micro/sg_execution_times.rst.txt     |   4 +-
 .../tutorials/optimize/opt_conv_cuda.rst.txt       |   2 +-
 .../tutorials/optimize/opt_conv_tensorcore.rst.txt |   2 +-
 docs/_sources/tutorials/optimize/opt_gemm.rst.txt  |  20 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |  10 +-
 docs/_sources/tutorials/topi/intro_topi.rst.txt    |   2 +-
 .../tutorials/topi/sg_execution_times.rst.txt      |   4 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |   4 +-
 .../vta/tutorials/autotvm/tune_relay_vta.rst.txt   |   2 +-
 .../frontend/deploy_classification.rst.txt         |   4 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |   4 +-
 .../vta/tutorials/optimize/convolution_opt.rst.txt |   4 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |   6 +-
 .../vta/tutorials/sg_execution_times.rst.txt       |   6 +-
 .../_sources/vta/tutorials/vta_get_started.rst.txt |   4 +-
 docs/_static/documentation_options.js              |   2 +-
 .../api/doxygen/attr__registry__map_8h_source.html |   2 +-
 docs/api/doxygen/c__runtime__api_8h.html           |   4 +-
 docs/api/doxygen/c__runtime__api_8h_source.html    |   2 +-
 .../doxygen/classtvm_1_1runtime_1_1NDArray.html    |   4 +-
 docs/api/doxygen/codegen_8h_source.html            |   2 +-
 docs/api/doxygen/device__api_8h_source.html        |   2 +-
 docs/api/doxygen/env__func_8h_source.html          |   2 +-
 docs/api/doxygen/generic__func_8h_source.html      |   4 +-
 docs/api/doxygen/ir_2attrs_8h_source.html          |   5 +-
 docs/api/doxygen/ir_2expr_8h_source.html           |   4 +-
 docs/api/doxygen/ir_2op_8h_source.html             |   2 +-
 docs/api/doxygen/namespacemembers_func_r.html      |   5 +-
 docs/api/doxygen/namespacemembers_r.html           |   5 +-
 .../doxygen/namespacetvm_1_1topi_1_1contrib.html   |  61 +++
 docs/api/doxygen/ndarray_8h_source.html            |  50 +-
 docs/api/doxygen/object_8h_source.html             |   6 +-
 docs/api/doxygen/packed__func_8h.html              |   4 +-
 docs/api/doxygen/packed__func_8h_source.html       | 144 +++---
 docs/api/doxygen/reflection_8h_source.html         |   2 +-
 docs/api/doxygen/relay_2type_8h_source.html        |   2 +-
 docs/api/doxygen/rocblas_8h.html                   |   3 +
 docs/api/doxygen/rocblas_8h_source.html            |   3 +-
 docs/api/doxygen/runtime_2module_8h_source.html    |   2 +-
 docs/api/doxygen/search/all_12.js                  |   3 +-
 docs/api/doxygen/search/functions_12.js            |   1 +
 docs/api/doxygen/target__kind_8h_source.html       |   2 +-
 docs/api/doxygen/tir_2expr_8h_source.html          |  10 +-
 .../javadoc/org/apache/tvm/class-use/Function.html |  12 +-
 .../javadoc/org/apache/tvm/class-use/Module.html   |   8 +-
 docs/api/links.html                                |   4 +-
 docs/api/python/auto_scheduler.html                |  10 +-
 docs/api/python/autotvm.html                       |   4 +-
 docs/api/python/contrib.html                       |  26 +-
 docs/api/python/driver.html                        |   4 +-
 docs/api/python/error.html                         |   4 +-
 docs/api/python/graph_runtime.html                 |   4 +-
 docs/api/python/index.html                         |   4 +-
 docs/api/python/ir.html                            |   4 +-
 docs/api/python/micro.html                         |   4 +-
 docs/api/python/ndarray.html                       |   4 +-
 docs/api/python/relay/analysis.html                |   4 +-
 docs/api/python/relay/backend.html                 |   4 +-
 docs/api/python/relay/dataflow_pattern.html        |   4 +-
 docs/api/python/relay/frontend.html                |   4 +-
 docs/api/python/relay/image.html                   |   4 +-
 docs/api/python/relay/index.html                   |   4 +-
 docs/api/python/relay/nn.html                      |   4 +-
 docs/api/python/relay/testing.html                 |   4 +-
 docs/api/python/relay/transform.html               |   4 +-
 docs/api/python/relay/vision.html                  |   4 +-
 docs/api/python/rpc.html                           |   4 +-
 docs/api/python/runtime.html                       |   4 +-
 docs/api/python/target.html                        |   4 +-
 docs/api/python/te.html                            |   4 +-
 docs/api/python/tir.html                           |   4 +-
 docs/api/python/topi.html                          |   4 +-
 docs/api/python/vta/index.html                     |   4 +-
 docs/api/typedoc/assets/js/main.js                 |   2 +-
 docs/api/typedoc/classes/bytestreamreader.html     |  13 +-
 docs/api/typedoc/classes/cachedcallstack.html      |  35 +-
 docs/api/typedoc/classes/dlcontext.html            |  11 +-
 docs/api/typedoc/classes/dldatatype.html           |  13 +-
 docs/api/typedoc/classes/environment.html          |  13 +-
 docs/api/typedoc/classes/ffilibrary.html           |  21 +-
 docs/api/typedoc/classes/graphruntime.html         |  17 +-
 docs/api/typedoc/classes/instance.html             |  41 +-
 docs/api/typedoc/classes/memory.html               |  35 +-
 docs/api/typedoc/classes/module.html               |  11 +-
 docs/api/typedoc/classes/ndarray.html              |  23 +-
 docs/api/typedoc/classes/packedfunccell.html       |   7 +-
 docs/api/typedoc/classes/rpcserver.html            |  15 +-
 docs/api/typedoc/classes/scalar.html               |   7 +-
 docs/api/typedoc/classes/webgpucontext.html        |  13 +-
 docs/api/typedoc/enums/argtypecode.html            |  31 +-
 docs/api/typedoc/enums/aynccallbackcode.html       |   5 +-
 docs/api/typedoc/enums/dldatatypecode.html         |   9 +-
 docs/api/typedoc/enums/rpcserverstate.html         |  13 +-
 docs/api/typedoc/enums/sizeof.html                 |  19 +-
 docs/api/typedoc/index.html                        | 115 ++---
 docs/api/typedoc/interfaces/disposable.html        |   3 +-
 docs/api/typedoc/interfaces/functioninfo.html      |   7 +-
 docs/api/typedoc/interfaces/libraryprovider.html   |   5 +-
 docs/contribute/code_guide.html                    |   4 +-
 docs/contribute/code_review.html                   |   4 +-
 docs/contribute/committer_guide.html               |   4 +-
 docs/contribute/community.html                     |   4 +-
 docs/contribute/document.html                      |   4 +-
 docs/contribute/error_handling.html                |   4 +-
 docs/contribute/git_howto.html                     |   4 +-
 docs/contribute/index.html                         |   4 +-
 docs/contribute/pull_request.html                  |   4 +-
 docs/contribute/release_process.html               |  16 +-
 docs/deploy/android.html                           |   4 +-
 docs/deploy/arm_compute_lib.html                   |   4 +-
 docs/deploy/cpp_deploy.html                        |   4 +-
 docs/deploy/hls.html                               |   4 +-
 docs/deploy/index.html                             |   4 +-
 docs/deploy/integrate.html                         |   4 +-
 docs/dev/benchmark.html                            |   4 +-
 docs/dev/codebase_walkthrough.html                 |   4 +-
 docs/dev/convert_layout.html                       |   4 +-
 docs/dev/debugger.html                             |   4 +-
 docs/dev/frontend/tensorflow.html                  |   4 +-
 docs/dev/how_to.html                               |   4 +-
 docs/dev/hybrid_script.html                        |   4 +-
 docs/dev/index.html                                |   4 +-
 docs/dev/inferbound.html                           |   4 +-
 docs/dev/introduction_to_module_serialization.html |   4 +-
 docs/dev/pass_infra.html                           |   4 +-
 docs/dev/relay_add_op.html                         |   4 +-
 docs/dev/relay_add_pass.html                       |   4 +-
 docs/dev/relay_bring_your_own_codegen.html         |   4 +-
 docs/dev/relay_intro.html                          |   4 +-
 docs/dev/relay_op_strategy.html                    |   4 +-
 docs/dev/runtime.html                              |   4 +-
 docs/dev/security.html                             |   4 +-
 docs/dev/virtual_machine.html                      |   4 +-
 docs/faq.html                                      |   4 +-
 docs/genindex.html                                 |  10 +-
 docs/index.html                                    |   4 +-
 docs/install/docker.html                           |   4 +-
 docs/install/from_source.html                      |   4 +-
 docs/install/index.html                            |   4 +-
 docs/install/nnpack.html                           |   4 +-
 docs/langref/hybrid_script.html                    |   4 +-
 docs/langref/index.html                            |   4 +-
 docs/langref/relay_adt.html                        |   4 +-
 docs/langref/relay_expr.html                       |   4 +-
 docs/langref/relay_op.html                         |   4 +-
 docs/langref/relay_pattern.html                    |   4 +-
 docs/langref/relay_type.html                       |   4 +-
 docs/objects.inv                                   | Bin 17002 -> 17072 bytes
 docs/py-modindex.html                              |   4 +-
 docs/search.html                                   |   4 +-
 docs/searchindex.js                                |   2 +-
 .../auto_scheduler/sg_execution_times.html         |  10 +-
 .../auto_scheduler/tune_conv2d_layer_cuda.html     | 431 +++++++++++-----
 docs/tutorials/auto_scheduler/tune_matmul_x86.html | 138 ++----
 docs/tutorials/autotvm/sg_execution_times.html     |  18 +-
 docs/tutorials/autotvm/tune_conv2d_cuda.html       |  46 +-
 docs/tutorials/autotvm/tune_relay_arm.html         |   4 +-
 docs/tutorials/autotvm/tune_relay_cuda.html        |   4 +-
 docs/tutorials/autotvm/tune_relay_mobile_gpu.html  |   4 +-
 docs/tutorials/autotvm/tune_relay_x86.html         |   4 +-
 docs/tutorials/autotvm/tune_simple_template.html   |  24 +-
 docs/tutorials/dev/bring_your_own_datatypes.html   |   4 +-
 docs/tutorials/dev/low_level_custom_pass.html      |   8 +-
 docs/tutorials/dev/sg_execution_times.html         |  12 +-
 docs/tutorials/dev/use_pass_infra.html             |   4 +-
 docs/tutorials/frontend/build_gcn.html             |   4 +-
 .../frontend/deploy_model_on_android.html          |   6 +-
 docs/tutorials/frontend/deploy_model_on_rasp.html  |   4 +-
 .../frontend/deploy_object_detection_pytorch.html  |   6 +-
 docs/tutorials/frontend/deploy_prequantized.html   |   8 +-
 .../frontend/deploy_prequantized_tflite.html       |   8 +-
 docs/tutorials/frontend/deploy_quantized.html      |   4 +-
 docs/tutorials/frontend/deploy_sparse.html         |   4 +-
 docs/tutorials/frontend/deploy_ssd_gluoncv.html    |   6 +-
 docs/tutorials/frontend/from_caffe2.html           |   4 +-
 docs/tutorials/frontend/from_coreml.html           |   4 +-
 docs/tutorials/frontend/from_darknet.html          |   4 +-
 docs/tutorials/frontend/from_keras.html            |   4 +-
 docs/tutorials/frontend/from_mxnet.html            |   4 +-
 docs/tutorials/frontend/from_onnx.html             |  10 +-
 docs/tutorials/frontend/from_pytorch.html          |   4 +-
 docs/tutorials/frontend/from_tensorflow.html       |   4 +-
 docs/tutorials/frontend/from_tflite.html           |   4 +-
 docs/tutorials/frontend/sg_execution_times.html    |  44 +-
 docs/tutorials/frontend/using_external_lib.html    |   4 +-
 .../get_started/cross_compilation_and_rpc.html     |   7 +-
 docs/tutorials/get_started/relay_quick_start.html  | 111 ++---
 docs/tutorials/get_started/sg_execution_times.html |  13 +-
 .../get_started/tensor_expr_get_started.html       |   7 +-
 .../get_started/tvmc_command_line_driver.html      | 539 +++++++++++++++++++++
 docs/tutorials/index.html                          | 203 ++++----
 docs/tutorials/language/extern_op.html             |   4 +-
 docs/tutorials/language/intrin_math.html           |   4 +-
 docs/tutorials/language/reduction.html             |   4 +-
 docs/tutorials/language/scan.html                  |  16 +-
 docs/tutorials/language/schedule_primitives.html   |  10 +-
 docs/tutorials/language/sg_execution_times.html    |  22 +-
 docs/tutorials/language/tedd.html                  |   4 +-
 docs/tutorials/language/tensorize.html             |  16 +-
 docs/tutorials/language/tuple_inputs.html          |  12 +-
 docs/tutorials/micro/micro_tflite.html             |   4 +-
 docs/tutorials/micro/sg_execution_times.html       |   8 +-
 docs/tutorials/optimize/opt_conv_cuda.html         |   6 +-
 docs/tutorials/optimize/opt_conv_tensorcore.html   |   6 +-
 docs/tutorials/optimize/opt_gemm.html              |  24 +-
 .../optimize/opt_matmul_auto_tensorcore.html       |   4 +-
 docs/tutorials/optimize/sg_execution_times.html    |  14 +-
 docs/tutorials/topi/intro_topi.html                |   6 +-
 docs/tutorials/topi/sg_execution_times.html        |   8 +-
 docs/vta/dev/config.html                           |   4 +-
 docs/vta/dev/hardware.html                         |   4 +-
 docs/vta/dev/index.html                            |   4 +-
 docs/vta/index.html                                |   4 +-
 docs/vta/install.html                              |   4 +-
 docs/vta/tutorials/autotvm/sg_execution_times.html |   8 +-
 docs/vta/tutorials/autotvm/tune_relay_vta.html     | 188 +++----
 .../tutorials/frontend/deploy_classification.html  |  22 +-
 .../vta/tutorials/frontend/sg_execution_times.html |   8 +-
 docs/vta/tutorials/index.html                      |   4 +-
 docs/vta/tutorials/matrix_multiply.html            |   4 +-
 docs/vta/tutorials/optimize/convolution_opt.html   |   8 +-
 .../tutorials/optimize/matrix_multiply_opt.html    |   4 +-
 .../vta/tutorials/optimize/sg_execution_times.html |  10 +-
 docs/vta/tutorials/sg_execution_times.html         |  10 +-
 docs/vta/tutorials/vta_get_started.html            |   8 +-
 255 files changed, 3523 insertions(+), 1631 deletions(-)

diff --git a/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py b/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
new file mode 100644
index 0000000..d844de5
--- /dev/null
+++ b/docs/_downloads/18fb1ab3ed0a0c9f304520f2beaf4fd6/tvmc_command_line_driver.py
@@ -0,0 +1,336 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Getting Started with TVM command line driver - TVMC
+===================================================
+**Authors**:
+`Leandro Nunes <https://github.com/leandron>`_,
+`Matthew Barrett <https://github.com/mbaret>`_
+
+This tutorial is an introduction to working with TVMC, the TVM command
+line driver. TVMC is a tool that exposes TVM features such as
+auto-tuning, compiling, profiling and execution of models, via a
+command line interface.
+
+In this tutorial we are going to use TVMC to compile, run and tune a
+ResNet-50 on a x86 CPU.
+
+We are going to start by downloading ResNet 50 V2. Then, we are going
+to use TVMC to compile this model into a TVM module, and use the
+compiled module to generate predictions. Finally, we are going to experiment
+with the auto-tuning options, that can be used to help the compiler to
+improve network performance.
+
+The final goal is to give an overview of TVMC's capabilities and also
+some guidance on where to look for more information.
+"""
+
+######################################################################
+# Using TVMC
+# ----------
+#
+# TVMC is a Python application, part of the TVM Python package.
+# When you install TVM using a Python package, you will get TVMC as
+# as a command line application called ``tvmc``.
+#
+# Alternatively, if you have TVM as a Python module on your
+# ``$PYTHONPATH``,you can access the command line driver functionality
+# via the executable python module, ``python -m tvm.driver.tvmc``.
+#
+# For simplicity, this tutorial will mention TVMC command line using
+# ``tvmc <options>``, but the same results can be obtained with
+# ``python -m tvm.driver.tvmc <options>``.
+#
+# You can check the help page using:
+#
+# .. code-block:: bash
+#
+#   tvmc --help
+#
+#
+# As you can see in the help page, the main features are
+# accessible via the subcommands ``tune``, ``compile`` and ``run``.
+# To read about specific options under a given subcommand, use
+# ``tvmc <subcommand> --help``.
+#
+# In the following sections we will use TVMC to tune, compile and
+# run a model. But first, we need a model.
+#
+
+
+######################################################################
+# Obtaining the model
+# -------------------
+#
+# We are going to use ResNet-50 V2 as an example to experiment with TVMC.
+# The version below is in ONNX format. To download the file, you can use
+# the command below:
+#
+# .. code-block:: bash
+#
+#   wget https://github.com/onnx/models/raw/master/vision/classification/resnet/model/resnet50-v2-7.onnx
+#
+#
+
+######################################################################
+# .. note:: Supported model formats
+#
+#   TVMC supports models created with Keras, ONNX, TensorFlow, TFLite
+#   and Torch. Use the option``--model-format`` if you need to
+#   explicitly provide the model format you are using. See ``tvmc
+#   compile --help`` for more information.
+#
+
+
+######################################################################
+# Compiling the model
+# -------------------
+#
+# The next step once we've downloaded ResNet-50, is to compile it,
+# To accomplish that, we are going to use ``tvmc compile``. The
+# output we get from the compilation process is a TAR package,
+# that can be used to run our model on the target device.
+#
+# .. code-block:: bash
+#
+#   tvmc compile \
+#     --target "llvm" \
+#     --output compiled_module.tar \
+#     resnet50-v2-7.onnx
+#
+# Once compilation finishes, the output ``compiled_module.tar`` will be created. This
+# can be directly loaded by your application and run via the TVM runtime APIs.
+#
+
+
+######################################################################
+# .. note:: Defining the correct target
+#
+#   Specifying the correct target (option ``--target``) can have a huge
+#   impact on the performance of the compiled module, as it can take
+#   advantage of hardware features available on the target. For more
+#   information, please refer to `Auto-tuning a convolutional network
+#   for x86 CPU <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.
+#
+
+
+######################################################################
+#
+# In the next step, we are going to use the compiled module, providing it
+# with some inputs, to generate some predictions.
+#
+
+
+######################################################################
+# Input pre-processing
+# --------------------
+#
+# In order to generate predictions, we will need two things:
+#
+# - the compiled module, which we just produced;
+# - a valid input to the model
+#
+# Each model is particular when it comes to expected tensor shapes, formats and data
+# types. For this reason, most models require some pre and
+# post processing, to ensure the input(s) is valid and to interpret the output(s).
+#
+# In TVMC, we adopted NumPy's ``.npz`` format for both input and output data.
+# This is a well-supported NumPy format to serialize multiple arrays into a file.
+#
+# We will use the usual cat image, similar to other TVM tutorials:
+#
+# .. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
+#    :height: 224px
+#    :width: 224px
+#    :align: center
+#
+# For our ResNet 50 V2 model, the input is expected to be in ImageNet format.
+# Here is an example of a script to pre-process an image for ResNet 50 V2.
+#
+from tvm.contrib.download import download_testdata
+from PIL import Image
+import numpy as np
+
+img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
+
+# Resize it to 224x224
+resized_image = Image.open(img_path).resize((224, 224))
+img_data = np.asarray(resized_image).astype("float32")
+
+# ONNX expects NCHW input, so convert the array
+img_data = np.transpose(img_data, (2, 0, 1))
+
+# Normalize according to ImageNet
+imagenet_mean = np.array([0.485, 0.456, 0.406])
+imagenet_stddev = np.array([0.229, 0.224, 0.225])
+norm_img_data = np.zeros(img_data.shape).astype("float32")
+for i in range(img_data.shape[0]):
+    norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
+
+# Add batch dimension
+img_data = np.expand_dims(norm_img_data, axis=0)
+
+# Save to .npz (outputs imagenet_cat.npz)
+np.savez("imagenet_cat", data=img_data)
+
+
+######################################################################
+# Running the compiled module
+# ---------------------------
+#
+# With both the compiled module and input file in hand, we can run it by
+# invoking ``tvmc run``.
+#
+# .. code-block:: bash
+#
+#    tvmc run \
+#      --inputs imagenet_cat.npz \
+#      --output predictions.npz \
+#      compiled_module.tar
+#
+# When running the above command, a new file ``predictions.npz`` should
+# be produced. It contains the output tensors.
+#
+# In this example, we are running the model on the same machine that we used
+# for compilation. In some cases we might want to run it remotely via
+# an RPC Tracker. To read more about these options please check ``tvmc
+# run --help``.
+#
+
+######################################################################
+# Output post-processing
+# ----------------------
+#
+# As previously mentioned, each model will have its own particular way
+# of providing output tensors.
+#
+# In our case, we need to run some post-processing to render the
+# outputs from ResNet 50 V2 into a more human-readable form.
+#
+# The script below shows an example of the post-processing to extract
+# labels from the output of our compiled module.
+#
+import os.path
+import numpy as np
+
+from scipy.special import softmax
+
+from tvm.contrib.download import download_testdata
+
+# Download a list of labels
+labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+labels_path = download_testdata(labels_url, "synset.txt", module="data")
+
+with open(labels_path, "r") as f:
+    labels = [l.rstrip() for l in f]
+
+output_file = "predictions.npz"
+
+# Open the output and read the output tensor
+if os.path.exists(output_file):
+    with np.load(output_file) as data:
+        scores = softmax(data["output_0"])
+        scores = np.squeeze(scores)
+        scores = np.argsort(scores)[::-1]
+
+        for i in scores[0:5]:
+            print("class='%s' with probability=%f" % (labels[i], scores[i]))
+
+
+########################################################################
+# When running the script, a list of predictions should be printed similar
+# the the example below.
+#
+# .. code-block:: bash
+#
+#   $ python post_processing.py
+#   class=n02123045 tabby, tabby cat ; probability=446.000000
+#   class=n02123159 tiger cat ; probability=675.000000
+#   class=n02124075 Egyptian cat ; probability=836.000000
+#   class=n02129604 tiger, Panthera tigris ; probability=917.000000
+#   class=n04040759 radiator ; probability=213.000000
+#
+
+
+######################################################################
+# Tuning the model
+# ----------------
+#
+# In some cases, we might not get the expected performance when running
+# inferences using our compiled module. In cases like this, we can make use
+# of the auto-tuner, to find a better configuration for our model and
+# get a boost in performance.
+#
+# Tuning in TVM refers to the process by which a model is optimized
+# to run faster on a given target. This differs from training or
+# fine-tuning in that it does not affect the accuracy of the model,
+# but only the runtime performance.
+#
+# As part of the tuning process, TVM will try running many different
+# operator implementation variants to see which perform best. The
+# results of these runs are stored in a tuning records file, which is
+# ultimately the output of the ``tune`` subcommand.
+#
+# In the simplest form, tuning requires you to provide three things:
+#
+# - the target specification of the device you intend to run this model on;
+# - the path to an output file in which the tuning records will be stored, and finally,
+# - a path to the model to be tuned.
+#
+#
+# The example below demonstrates how that works in practice:
+#
+# .. code-block:: bash
+#
+#   tvmc tune \
+#     --target "llvm" \
+#     --output autotuner_records.json \
+#     resnet50-v2-7.onnx
+#
+#
+# Tuning sessions can take a long time, so ``tvmc tune`` offers many options to
+# customize your tuning process, in terms of number of repetitions (``--repeat`` and
+# ``--number``, for example), the tuning algorithm to be use, and so on.
+# Check ``tvmc tune --help`` for more information.
+#
+# As an output of the tuning process above, we obtained the tuning records stored
+# in ``autotuner_records.json``. This file can be used in two ways:
+#
+# - as an input to further tuning (via ``tvmc tune --tuning-records``), or
+# - as an input to the compiler
+#
+# The compiler will use the results to generate high performance code for the model
+# on your specified target. To do that we can use ``tvmc compile --tuning-records``.
+# Check ``tvmc compile --help`` for more information.
+#
+
+
+######################################################################
+# Final Remarks
+# -------------
+#
+# In this tutorial, we presented TVMC, a command line driver for TVM.
+# We demonstrated how to compile, run and tune a model, as well
+# as discussed the need for pre and post processing of inputs and outputs.
+#
+# Here we presented a simple example using ResNet 50 V2 locally. However, TVMC
+# supports many more features including cross-compilation, remote execution and
+# profiling/benchmarking.
+#
+# To see what other options are available, please have a look at ``tvmc --help``.
+#
diff --git a/docs/_downloads/dfa0880631b34bb8814952afdc9031d8/tvmc_command_line_driver.ipynb b/docs/_downloads/dfa0880631b34bb8814952afdc9031d8/tvmc_command_line_driver.ipynb
new file mode 100644
index 0000000..89c0d65
--- /dev/null
+++ b/docs/_downloads/dfa0880631b34bb8814952afdc9031d8/tvmc_command_line_driver.ipynb
@@ -0,0 +1,149 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\nGetting Started with TVM command line driver - TVMC\n===================================================\n**Authors**:\n`Leandro Nunes <https://github.com/leandron>`_,\n`Matthew Barrett <https://github.com/mbaret>`_\n\nThis tutorial is an introduction to working with TVMC, the TVM command\nline driver. TVMC is a tool that exposes TVM features such as\nauto-tuning, compiling, profiling and execution of models, via a\ncommand line interface.\n\nIn this tutorial we are going to u [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Using TVMC\n----------\n\nTVMC is a Python application, part of the TVM Python package.\nWhen you install TVM using a Python package, you will get TVMC as\nas a command line application called ``tvmc``.\n\nAlternatively, if you have TVM as a Python module on your\n``$PYTHONPATH``,you can access the command line driver functionality\nvia the executable python module, ``python -m tvm.driver.tvmc``.\n\nFor simplicity, this tutorial will mention TVMC command line using\n``tvmc <opti [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Obtaining the model\n-------------------\n\nWe are going to use ResNet-50 V2 as an example to experiment with TVMC.\nThe version below is in ONNX format. To download the file, you can use\nthe command below:\n\n.. code-block:: bash\n\n  wget https://github.com/onnx/models/raw/master/vision/classification/resnet/model/resnet50-v2-7.onnx\n\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Supported model formats\n\n  TVMC supports models created with Keras, ONNX, TensorFlow, TFLite\n  and Torch. Use the option``--model-format`` if you need to\n  explicitly provide the model format you are using. See ``tvmc\n  compile --help`` for more information.</p></div>\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Compiling the model\n-------------------\n\nThe next step once we've downloaded ResNet-50, is to compile it,\nTo accomplish that, we are going to use ``tvmc compile``. The\noutput we get from the compilation process is a TAR package,\nthat can be used to run our model on the target device.\n\n.. code-block:: bash\n\n  tvmc compile \\\n    --target \"llvm\" \\\n    --output compiled_module.tar \\\n    resnet50-v2-7.onnx\n\nOnce compilation finishes, the output ``compiled_module.t [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-info\"><h4>Note</h4><p>Defining the correct target\n\n  Specifying the correct target (option ``--target``) can have a huge\n  impact on the performance of the compiled module, as it can take\n  advantage of hardware features available on the target. For more\n  information, please refer to `Auto-tuning a convolutional network\n  for x86 CPU <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.</p></div>\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "In the next step, we are going to use the compiled module, providing it\nwith some inputs, to generate some predictions.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Input pre-processing\n--------------------\n\nIn order to generate predictions, we will need two things:\n\n- the compiled module, which we just produced;\n- a valid input to the model\n\nEach model is particular when it comes to expected tensor shapes, formats and data\ntypes. For this reason, most models require some pre and\npost processing, to ensure the input(s) is valid and to interpret the output(s).\n\nIn TVMC, we adopted NumPy's ``.npz`` format for both input and output [...]
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "from tvm.contrib.download import download_testdata\nfrom PIL import Image\nimport numpy as np\n\nimg_url = \"https://s3.amazonaws.com/model-server/inputs/kitten.jpg\"\nimg_path = download_testdata(img_url, \"imagenet_cat.png\", module=\"data\")\n\n# Resize it to 224x224\nresized_image = Image.open(img_path).resize((224, 224))\nimg_data = np.asarray(resized_image).astype(\"float32\")\n\n# ONNX expects NCHW input, so convert the array\nimg_data = np.transpose(img_data, (2, 0, 1))\ [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Running the compiled module\n---------------------------\n\nWith both the compiled module and input file in hand, we can run it by\ninvoking ``tvmc run``.\n\n.. code-block:: bash\n\n   tvmc run \\\n     --inputs imagenet_cat.npz \\\n     --output predictions.npz \\\n     compiled_module.tar\n\nWhen running the above command, a new file ``predictions.npz`` should\nbe produced. It contains the output tensors.\n\nIn this example, we are running the model on the same machine that we [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Output post-processing\n----------------------\n\nAs previously mentioned, each model will have its own particular way\nof providing output tensors.\n\nIn our case, we need to run some post-processing to render the\noutputs from ResNet 50 V2 into a more human-readable form.\n\nThe script below shows an example of the post-processing to extract\nlabels from the output of our compiled module.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os.path\nimport numpy as np\n\nfrom scipy.special import softmax\n\nfrom tvm.contrib.download import download_testdata\n\n# Download a list of labels\nlabels_url = \"https://s3.amazonaws.com/onnx-model-zoo/synset.txt\"\nlabels_path = download_testdata(labels_url, \"synset.txt\", module=\"data\")\n\nwith open(labels_path, \"r\") as f:\n    labels = [l.rstrip() for l in f]\n\noutput_file = \"predictions.npz\"\n\n# Open the output and read the output tensor\nif os.path.exist [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "When running the script, a list of predictions should be printed similar\nthe the example below.\n\n.. code-block:: bash\n\n  $ python post_processing.py\n  class=n02123045 tabby, tabby cat ; probability=446.000000\n  class=n02123159 tiger cat ; probability=675.000000\n  class=n02124075 Egyptian cat ; probability=836.000000\n  class=n02129604 tiger, Panthera tigris ; probability=917.000000\n  class=n04040759 radiator ; probability=213.000000\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Tuning the model\n----------------\n\nIn some cases, we might not get the expected performance when running\ninferences using our compiled module. In cases like this, we can make use\nof the auto-tuner, to find a better configuration for our model and\nget a boost in performance.\n\nTuning in TVM refers to the process by which a model is optimized\nto run faster on a given target. This differs from training or\nfine-tuning in that it does not affect the accuracy of the model,\nb [...]
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Final Remarks\n-------------\n\nIn this tutorial, we presented TVMC, a command line driver for TVM.\nWe demonstrated how to compile, run and tune a model, as well\nas discussed the need for pre and post processing of inputs and outputs.\n\nHere we presented a simple example using ResNet 50 V2 locally. However, TVMC\nsupports many more features including cross-compilation, remote execution and\nprofiling/benchmarking.\n\nTo see what other options are available, please have a look [...]
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.10"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/_images/sphx_glr_tvmc_command_line_driver_thumb.png b/docs/_images/sphx_glr_tvmc_command_line_driver_thumb.png
new file mode 100644
index 0000000..233f8e6
Binary files /dev/null and b/docs/_images/sphx_glr_tvmc_command_line_driver_thumb.png differ
diff --git a/docs/_sources/contribute/release_process.rst.txt b/docs/_sources/contribute/release_process.rst.txt
index 3e2239f..705b55c 100644
--- a/docs/_sources/contribute/release_process.rst.txt
+++ b/docs/_sources/contribute/release_process.rst.txt
@@ -59,7 +59,18 @@ After generating the gpg key, you need to upload your key to a public key server
 
 If you want to do the release on another machine, you can transfer your gpg key to that machine via the :code:`gpg --export` and :code:`gpg --import` commands.
 
-The last step is to update the KEYS file with your code signing key https://www.apache.org/dev/openpgp.html#export-public-key. Check in the changes to the master branch.
+The last step is to update the KEYS file with your code signing key https://www.apache.org/dev/openpgp.html#export-public-key. Check in the changes to the TVM master branch, as well as ASF SVN,
+
+.. code-block:: bash
+
+	# the --depth=files will avoid checkout existing folders
+	svn co --depth=files "https://dist.apache.org/repos/dist/dev/incubator/tvm" svn-tvm
+	cd svn-tvm
+	# edit KEYS file
+	svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m "Update KEYS"
+	# update downloads.apache.org
+	svn rm --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/release/incubator/tvm/KEYS -m "Update KEYS"
+	svn cp --username $ASF_USERNAME --password "$ASF_PASSWORD" https://dist.apache.org/repos/dist/dev/incubator/tvm/KEYS https://dist.apache.org/repos/dist/release/incubator/tvm/ -m "Update KEYS"
 
 
 Cut a Release Candidate
diff --git a/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt b/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt
index 385f5e4..c4fcc11 100644
--- a/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/auto_scheduler/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**04:42.879** total execution time for **tutorials_auto_scheduler** files:
+**03:32.923** total execution time for **tutorials_auto_scheduler** files:
 
-- **02:56.708**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
-- **01:46.171**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_matmul_x86.py` (``tune_matmul_x86.py``)
+- **01:57.640**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``)
+- **01:35.283**: :ref:`sphx_glr_tutorials_auto_scheduler_tune_matmul_x86.py` (``tune_matmul_x86.py``)
diff --git a/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt
index cc58696..312ab8b 100644
--- a/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/tutorials/auto_scheduler/tune_conv2d_layer_cuda.rst.txt
@@ -199,63 +199,278 @@ cooperative fetching, unrolling and operator fusion.
                  kernel: Buffer(kernel_2: Pointer(float32), float32, [512, 512, 3, 3], []),
                  data: Buffer(data_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 28;
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 64;
       attr [compute_3: Pointer(float32)] "storage_scope" = "local";
-      allocate(compute_3, float32, [16]);
+      allocate(compute_3, float32, [8]);
       attr [pad_temp.shared: Pointer(float32)] "storage_scope" = "shared";
-      allocate(pad_temp.shared, float32, [144]);
+      allocate(pad_temp.shared, float32, [1296]);
       attr [kernel.shared: Pointer(float32)] "storage_scope" = "shared";
-      allocate(kernel.shared, float32, [6144]);
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+      allocate(kernel.shared, float32, [1152]);
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
         compute_3[0] = 0f32
-        compute_3[4] = 0f32
-        compute_3[8] = 0f32
-        compute_3[12] = 0f32
         compute_3[1] = 0f32
-        compute_3[5] = 0f32
-        compute_3[9] = 0f32
-        compute_3[13] = 0f32
         compute_3[2] = 0f32
-        compute_3[6] = 0f32
-        compute_3[10] = 0f32
-        compute_3[14] = 0f32
         compute_3[3] = 0f32
+        compute_3[4] = 0f32
+        compute_3[5] = 0f32
+        compute_3[6] = 0f32
         compute_3[7] = 0f32
-        compute_3[11] = 0f32
-        compute_3[15] = 0f32
         for (rc.outer.outer: int32, 0, 32) {
-          for (ry.outer.outer: int32, 0, 3) {
-            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            pad_temp.shared[threadIdx.x_1] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), (float32*)data_2[((((((rc.outer.outer*784) + (floordiv(threadIdx.x_1, 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            pad_temp.shared[(threadIdx.x_1 + 56)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), (float32*)data_2[((((((rc.outer.outer*784) + (floordiv((threadIdx.x_1 + 56), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-            if @tir.likely((threadIdx.x_1 < 32), dtype=bool) {
-              pad_temp.shared[(threadIdx.x_1 + 112)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), (float32*)data_2[((((((rc.outer.outer*784) + (floordiv((threadIdx.x_1 + 112), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            pad_temp.shared[(threadIdx.x_1*4)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1*4), 81)) && (floormod((threadIdx.x_1*4), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv((threadIdx.x_1*4), 81)*49)) + (floordiv(floormod((threadIdx.x_1*4), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 1), 81)) && (floormod(((threadIdx.x_1*4) + 1), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 1), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 2), 81)) && (floormod(((threadIdx.x_1*4) + 2), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 2), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 2), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 3), 81)) && (floormod(((threadIdx.x_1*4) + 3), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 3), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 3), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            pad_temp.shared[((threadIdx.x_1*4) + 196)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 34), 81)) && (floormod(((threadIdx.x_1*4) + 34), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 196), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 34), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 197)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 35), 81)) && (floormod(((threadIdx.x_1*4) + 35), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 197), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 35), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 198)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 36), 81)) && (floormod(((threadIdx.x_1*4) + 36), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 198), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 36), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 199)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 37), 81)) && (floormod(((threadIdx.x_1*4) + 37), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 199), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 37), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            pad_temp.shared[((threadIdx.x_1*4) + 392)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 68), 81)) && (floormod(((threadIdx.x_1*4) + 68), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 5), 9))) && (floormod(((threadIdx.x_1*4) + 5), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 392), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 68), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 5), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 393)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 69), 81)) && (floormod(((threadIdx.x_1*4) + 69), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 6), 9))) && (floormod(((threadIdx.x_1*4) + 6), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 393), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 69), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 394)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 70), 81)) && (floormod(((threadIdx.x_1*4) + 70), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 394), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 70), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 395)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 71), 81)) && (floormod(((threadIdx.x_1*4) + 71), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 395), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 71), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            pad_temp.shared[((threadIdx.x_1*4) + 588)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 21), 81)) && (floormod(((threadIdx.x_1*4) + 21), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 588), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 21), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 589)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 22), 81)) && (floormod(((threadIdx.x_1*4) + 22), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 4), 9))) && (floormod(((threadIdx.x_1*4) + 4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 589), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 22), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 4), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 590)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 23), 81)) && (floormod(((threadIdx.x_1*4) + 23), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 5), 9))) && (floormod(((threadIdx.x_1*4) + 5), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 590), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 23), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 5), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 591)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 24), 81)) && (floormod(((threadIdx.x_1*4) + 24), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 6), 9))) && (floormod(((threadIdx.x_1*4) + 6), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 591), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 24), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - 8)], 0f32, dtype=float32)
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            pad_temp.shared[((threadIdx.x_1*4) + 784)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 55), 81)) && (floormod(((threadIdx.x_1*4) + 55), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 784), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 55), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 785)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 56), 81)) && (floormod(((threadIdx.x_1*4) + 56), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 785), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 56), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 786)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 57), 81)) && (floormod(((threadIdx.x_1*4) + 57), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 786), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 57), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 787)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 58), 81)) && (floormod(((threadIdx.x_1*4) + 58), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 4), 9))) && (floormod(((threadIdx.x_1*4) + 4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 787), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 58), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 4), 9)) - 8)], 0f32, dtype=float32)
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            pad_temp.shared[((threadIdx.x_1*4) + 980)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 8), 81)) && (floormod(((threadIdx.x_1*4) + 8), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 980), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 8), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 981)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 9), 81)) && (floormod(((threadIdx.x_1*4) + 9), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 981), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 9), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 982)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 10), 81)) && (floormod(((threadIdx.x_1*4) + 10), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 982), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 10), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+            pad_temp.shared[((threadIdx.x_1*4) + 983)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 11), 81)) && (floormod(((threadIdx.x_1*4) + 11), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 983), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 11), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49 {
+            if @tir.likely((threadIdx.x_1 < 30), dtype=bool) {
+              pad_temp.shared[((threadIdx.x_1*4) + 1176)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 42), 81)) && (floormod(((threadIdx.x_1*4) + 42), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 6), 9))) && (floormod(((threadIdx.x_1*4) + 6), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1176), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 42), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - 8)], 0f32, dtype=float32)
             }
-            for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 110) {
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
-              if @tir.likely((((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2) < 6144), dtype=bool) {
-                kernel.shared[((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2)] = (float32*)kernel_2[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2), 48)*4608)) + (rc.outer.outer*144)) + (floordiv(floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2), 48), 3)*9)) + (ry.outer.outer*3)) + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2), 3))]
+            if @tir.likely(((threadIdx.x_1*4) < 119), dtype=bool) {
+              if @tir.likely((threadIdx.x_1 < 30), dtype=bool) {
+                pad_temp.shared[((threadIdx.x_1*4) + 1177)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 43), 81)) && (floormod(((threadIdx.x_1*4) + 43), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 7), 9))) && (floormod(((threadIdx.x_1*4) + 7), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1177), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 43), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - 8)], 0f32, dtype=float32)
               }
             }
-            for (rx.outer.inner: int32, 0, 3) {
-              for (ff.outer.inner: int32, 0, 4) {
-                for (rc.inner: int32, 0, 16) {
-                  compute_3[ff.outer.inner] = ((float32*)compute_3[ff.outer.inner] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner)]))
-                  compute_3[(ff.outer.inner + 4)] = ((float32*)compute_3[(ff.outer.inner + 4)] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner) + 1536)]))
-                  compute_3[(ff.outer.inner + 8)] = ((float32*)compute_3[(ff.outer.inner + 8)] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner) + 3072)]))
-                  compute_3[(ff.outer.inner + 12)] = ((float32*)compute_3[(ff.outer.inner + 12)] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner) + 4608)]))
-                }
+            if @tir.likely(((threadIdx.x_1*4) < 118), dtype=bool) {
+              if @tir.likely((threadIdx.x_1 < 30), dtype=bool) {
+                pad_temp.shared[((threadIdx.x_1*4) + 1178)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 44), 81)) && (floormod(((threadIdx.x_1*4) + 44), 81) < 72)) && (1 <= floormod(((threadIdx.x_1*4) + 8), 9))) && (floormod(((threadIdx.x_1*4) + 8), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1178), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 44), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8)], 0f32, dtype=float32)
               }
             }
+            if @tir.likely(((threadIdx.x_1*4) < 117), dtype=bool) {
+              if @tir.likely((threadIdx.x_1 < 30), dtype=bool) {
+                pad_temp.shared[((threadIdx.x_1*4) + 1179)] = @tir.if_then_else(((((9 <= floormod(((threadIdx.x_1*4) + 45), 81)) && (floormod(((threadIdx.x_1*4) + 45), 81) < 72)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1179), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 45), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
+              }
+            }
+          }
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[threadIdx.x_2] = (float32*)kernel_2[(((blockIdx.x*36864) + (rc.outer.outer*144)) + threadIdx.x_2)]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 49)] = (float32*)kernel_2[(((blockIdx.x*36864) + (rc.outer.outer*144)) + (threadIdx.x_2 + 49))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 98)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 98), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 98), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 147)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 147), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 3), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 196)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 196), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 52), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 245)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 245), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 101), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 294)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 294), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 6), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 343)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 343), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 55), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 392)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 392), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 104), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 441)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 441), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 9), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 490)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 490), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 58), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 539)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 539), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 107), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 588)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 588), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 12), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 637)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 637), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 61), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 686)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 686), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 110), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 735)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 735), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 15), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 784)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 784), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 64), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 833)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 833), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 113), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 882)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 882), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 18), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 931)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 931), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 67), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 980)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 980), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 116), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 1029)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1029), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 21), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          kernel.shared[(threadIdx.x_2 + 1078)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1078), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 70), 144))]
+          attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 49;
+          if @tir.likely((threadIdx.x_2 < 25), dtype=bool) {
+            kernel.shared[(threadIdx.x_2 + 1127)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1127), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 119), 144))]
+          }
+          for (rc.outer.inner: int32, 0, 8) {
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(rc.outer.inner*18)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 144)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 288)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 432)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 145)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 289)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 433)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 2)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 146)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 290)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 434)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 3)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 147)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 291)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 435)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 4)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 148)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 292)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 436)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 5)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 149)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 293)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 437)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 6)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 150)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 294)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 438)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 7)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 151)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 295)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 439)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 8)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 152)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 296)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 440)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 9)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 153)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 297)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 441)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 10)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 154)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 298)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 442)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 11)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 155)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 299)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 443)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 12)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 156)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 300)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 444)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 13)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 157)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 301)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 445)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 14)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 158)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 302)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 446)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 15)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 159)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 303)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 447)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 16)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 160)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 304)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 448)]))
+            compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 17)]))
+            compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 161)]))
+            compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 305)]))
+            compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 449)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 576)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 720)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 864)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 1008)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 577)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 721)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 865)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1009)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 578)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 722)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 866)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1010)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 579)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 723)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 867)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1011)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 580)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 724)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 868)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1012)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 581)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 725)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 869)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1013)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 582)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 726)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 870)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1014)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 583)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 727)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 871)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1015)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 584)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 728)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 872)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1016)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 585)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 729)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 873)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1017)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 586)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 730)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 874)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1018)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 587)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 731)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 875)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1019)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 588)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 732)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 876)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1020)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 589)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 733)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 877)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1021)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 590)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 734)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 878)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1022)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 591)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 735)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 879)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1023)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 592)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 736)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 880)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1024)]))
+            compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 593)]))
+            compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 737)]))
+            compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 881)]))
+            compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1025)]))
           }
         }
-        for (i1.inner: int32, 0, 4) {
-          compute_2[(((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7))] = max(((float32*)compute_3[i1.inner] + (float32*)bias_2[(((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner)]), 0f32)
-          compute_2[((((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 1568)] = max(((float32*)compute_3[(i1.inner + 4)] + (float32*)bias_2[((((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner) + 32)]), 0f32)
-          compute_2[((((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 3136)] = max(((float32*)compute_3[(i1.inner + 8)] + (float32*)bias_2[((((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner) + 64)]), 0f32)
-          compute_2[((((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 4704)] = max(((float32*)compute_3[(i1.inner + 12)] + (float32*)bias_2[((((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner) + 96)]), 0f32)
+        for (i1.inner: int32, 0, 8) {
+          compute_2[(((blockIdx.x*392) + (i1.inner*49)) + threadIdx.x)] = max(((float32*)compute_3[i1.inner] + (float32*)bias_2[((blockIdx.x*8) + i1.inner)]), 0f32)
         }
       }
     }
@@ -308,7 +523,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.361 ms
+    Execution time of this operator: 0.153 ms
 
 
 
@@ -349,73 +564,73 @@ print the equivalent python schedule API, and build the binary again.
  .. code-block:: none
 
     Equivalent python schedule:
-    i0, i1, i2, i3 = tuple(pad_temp.op.axis) + tuple(pad_temp.op.reduce_axis)
-    nn, ff, yy, xx, rc, ry, rx = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
-    ax0, ax1, ax2, ax3 = tuple(T_add.op.axis) + tuple(T_add.op.reduce_axis)
-    i0, i1, i2, i3 = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
+    pad_temp_i0, pad_temp_i1, pad_temp_i2, pad_temp_i3 = tuple(pad_temp.op.axis) + tuple(pad_temp.op.reduce_axis)
+    compute_nn, compute_ff, compute_yy, compute_xx, compute_rc, compute_ry, compute_rx = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
+    T_add_ax0, T_add_ax1, T_add_ax2, T_add_ax3 = tuple(T_add.op.axis) + tuple(T_add.op.reduce_axis)
+    compute_i0, compute_i1, compute_i2, compute_i3 = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
     s[T_add].compute_inline()
-    nn_o_i, nn_i = s[compute].split(nn, factor=1)
-    nn_o_o_i, nn_o_i = s[compute].split(nn_o_i, factor=1)
-    nn_o_o_o_i, nn_o_o_i = s[compute].split(nn_o_o_i, factor=1)
-    nn_o_o_o_o, nn_o_o_o_i = s[compute].split(nn_o_o_o_i, factor=1)
-    ff_o_i, ff_i = s[compute].split(ff, factor=1)
-    ff_o_o_i, ff_o_i = s[compute].split(ff_o_i, factor=1)
-    ff_o_o_o_i, ff_o_o_i = s[compute].split(ff_o_o_i, factor=16)
-    ff_o_o_o_o, ff_o_o_o_i = s[compute].split(ff_o_o_o_i, factor=1)
-    yy_o_i, yy_i = s[compute].split(yy, factor=1)
-    yy_o_o_i, yy_o_i = s[compute].split(yy_o_i, factor=7)
-    yy_o_o_o_i, yy_o_o_i = s[compute].split(yy_o_o_i, factor=1)
-    yy_o_o_o_o, yy_o_o_o_i = s[compute].split(yy_o_o_o_i, factor=1)
-    xx_o_i, xx_i = s[compute].split(xx, factor=1)
-    xx_o_o_i, xx_o_i = s[compute].split(xx_o_i, factor=1)
-    xx_o_o_o_i, xx_o_o_i = s[compute].split(xx_o_o_i, factor=7)
-    xx_o_o_o_o, xx_o_o_o_i = s[compute].split(xx_o_o_o_i, factor=1)
-    rc_o_i, rc_i = s[compute].split(rc, factor=1)
-    rc_o_o, rc_o_i = s[compute].split(rc_o_i, factor=16)
-    ry_o_i, ry_i = s[compute].split(ry, factor=3)
-    ry_o_o, ry_o_i = s[compute].split(ry_o_i, factor=1)
-    rx_o_i, rx_i = s[compute].split(rx, factor=1)
-    rx_o_o, rx_o_i = s[compute].split(rx_o_i, factor=3)
-    s[compute].reorder(nn_o_o_o_o, ff_o_o_o_o, yy_o_o_o_o, xx_o_o_o_o, nn_o_o_o_i, ff_o_o_o_i, yy_o_o_o_i, xx_o_o_o_i, nn_o_o_i, ff_o_o_i, yy_o_o_i, xx_o_o_i, rc_o_o, ry_o_o, rx_o_o, rc_o_i, ry_o_i, rx_o_i, nn_o_i, ff_o_i, yy_o_i, xx_o_i, rc_i, ry_i, rx_i, nn_i, ff_i, yy_i, xx_i)
-    i0_o_i, i0_i = s[compute].split(i0, factor=1)
-    i0_o_o_i, i0_o_i = s[compute].split(i0_o_i, factor=1)
-    i0_o_o_o, i0_o_o_i = s[compute].split(i0_o_o_i, factor=1)
-    i1_o_i, i1_i = s[compute].split(i1, factor=1)
-    i1_o_o_i, i1_o_i = s[compute].split(i1_o_i, factor=16)
-    i1_o_o_o, i1_o_o_i = s[compute].split(i1_o_o_i, factor=1)
-    i2_o_i, i2_i = s[compute].split(i2, factor=7)
-    i2_o_o_i, i2_o_i = s[compute].split(i2_o_i, factor=1)
-    i2_o_o_o, i2_o_o_i = s[compute].split(i2_o_o_i, factor=1)
-    i3_o_i, i3_i = s[compute].split(i3, factor=1)
-    i3_o_o_i, i3_o_i = s[compute].split(i3_o_i, factor=7)
-    i3_o_o_o, i3_o_o_i = s[compute].split(i3_o_o_i, factor=1)
-    s[compute].reorder(i0_o_o_o, i1_o_o_o, i2_o_o_o, i3_o_o_o, i0_o_o_i, i1_o_o_i, i2_o_o_i, i3_o_o_i, i0_o_i, i1_o_i, i2_o_i, i3_o_i, i0_i, i1_i, i2_i, i3_i)
-    s[compute].compute_at(s[compute], i3_o_i)
+    compute_nn_o_i, compute_nn_i = s[compute].split(compute_nn, factor=1)
+    compute_nn_o_o_i, compute_nn_o_i = s[compute].split(compute_nn_o_i, factor=1)
+    compute_nn_o_o_o_i, compute_nn_o_o_i = s[compute].split(compute_nn_o_o_i, factor=1)
+    compute_nn_o_o_o_o, compute_nn_o_o_o_i = s[compute].split(compute_nn_o_o_o_i, factor=1)
+    compute_ff_o_i, compute_ff_i = s[compute].split(compute_ff, factor=1)
+    compute_ff_o_o_i, compute_ff_o_i = s[compute].split(compute_ff_o_i, factor=1)
+    compute_ff_o_o_o_i, compute_ff_o_o_i = s[compute].split(compute_ff_o_o_i, factor=16)
+    compute_ff_o_o_o_o, compute_ff_o_o_o_i = s[compute].split(compute_ff_o_o_o_i, factor=1)
+    compute_yy_o_i, compute_yy_i = s[compute].split(compute_yy, factor=7)
+    compute_yy_o_o_i, compute_yy_o_i = s[compute].split(compute_yy_o_i, factor=1)
+    compute_yy_o_o_o_i, compute_yy_o_o_i = s[compute].split(compute_yy_o_o_i, factor=1)
+    compute_yy_o_o_o_o, compute_yy_o_o_o_i = s[compute].split(compute_yy_o_o_o_i, factor=1)
+    compute_xx_o_i, compute_xx_i = s[compute].split(compute_xx, factor=1)
+    compute_xx_o_o_i, compute_xx_o_i = s[compute].split(compute_xx_o_i, factor=1)
+    compute_xx_o_o_o_i, compute_xx_o_o_i = s[compute].split(compute_xx_o_o_i, factor=7)
+    compute_xx_o_o_o_o, compute_xx_o_o_o_i = s[compute].split(compute_xx_o_o_o_i, factor=1)
+    compute_rc_o_i, compute_rc_i = s[compute].split(compute_rc, factor=2)
+    compute_rc_o_o, compute_rc_o_i = s[compute].split(compute_rc_o_i, factor=8)
+    compute_ry_o_i, compute_ry_i = s[compute].split(compute_ry, factor=1)
+    compute_ry_o_o, compute_ry_o_i = s[compute].split(compute_ry_o_i, factor=3)
+    compute_rx_o_i, compute_rx_i = s[compute].split(compute_rx, factor=3)
+    compute_rx_o_o, compute_rx_o_i = s[compute].split(compute_rx_o_i, factor=1)
+    s[compute].reorder(compute_nn_o_o_o_o, compute_ff_o_o_o_o, compute_yy_o_o_o_o, compute_xx_o_o_o_o, compute_nn_o_o_o_i, compute_ff_o_o_o_i, compute_yy_o_o_o_i, compute_xx_o_o_o_i, compute_nn_o_o_i, compute_ff_o_o_i, compute_yy_o_o_i, compute_xx_o_o_i, compute_rc_o_o, compute_ry_o_o, compute_rx_o_o, compute_rc_o_i, compute_ry_o_i, compute_rx_o_i, compute_nn_o_i, compute_ff_o_i, compute_yy_o_i, compute_xx_o_i, compute_rc_i, compute_ry_i, compute_rx_i, compute_nn_i, compute_ff_i, compute [...]
+    compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
+    compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
+    compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=16)
+    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
+    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+    compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+    compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
+    s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
+    s[compute].compute_at(s[compute], compute_i3_o_i)
     kernel_shared = s.cache_read(kernel, "shared", [compute])
-    ax0, ax1, ax2, ax3 = tuple(kernel_shared.op.axis)
-    s[kernel_shared].compute_at(s[compute], rx_o_o)
+    kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3 = tuple(kernel_shared.op.axis)
+    s[kernel_shared].compute_at(s[compute], compute_rx_o_o)
     pad_temp_shared = s.cache_read(pad_temp, "shared", [compute])
-    ax0, ax1, ax2, ax3 = tuple(pad_temp_shared.op.axis)
-    s[pad_temp_shared].compute_at(s[compute], rx_o_o)
+    pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3 = tuple(pad_temp_shared.op.axis)
+    s[pad_temp_shared].compute_at(s[compute], compute_rx_o_o)
     s[pad_temp].compute_inline()
-    i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused = s[compute].fuse(i0_o_o_o, i1_o_o_o, i2_o_o_o, i3_o_o_o)
-    s[compute].bind(i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused, tvm.thread_axis("blockIdx.x"))
-    i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused = s[compute].fuse(i0_o_o_i, i1_o_o_i, i2_o_o_i, i3_o_o_i)
-    s[compute].bind(i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused, tvm.thread_axis("vthread"))
-    i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused = s[compute].fuse(i0_o_i, i1_o_i, i2_o_i, i3_o_i)
-    s[compute].bind(i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, tvm.thread_axis("threadIdx.x"))
-    ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(ax0, ax1, ax2, ax3)
-    ax0_ax1_fused_ax2_fused_ax3_fused_o, ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused, factor=3)
-    s[kernel_shared].vectorize(ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    ax0_ax1_fused_ax2_fused_ax3_fused_o_o, ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
-    s[kernel_shared].bind(ax0_ax1_fused_ax2_fused_ax3_fused_o_i, tvm.thread_axis("threadIdx.x"))
-    ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(ax0, ax1, ax2, ax3)
-    ax0_ax1_fused_ax2_fused_ax3_fused_o, ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused, factor=9)
-    s[pad_temp_shared].vectorize(ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    ax0_ax1_fused_ax2_fused_ax3_fused_o_o, ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
-    s[pad_temp_shared].bind(ax0_ax1_fused_ax2_fused_ax3_fused_o_i, tvm.thread_axis("threadIdx.x"))
-    s[compute].pragma(nn_o_o_o_o, "auto_unroll_max_step", 1024)
-    s[compute].pragma(nn_o_o_o_o, "unroll_explicit", True)
+    compute_i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused = s[compute].fuse(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o)
+    s[compute].bind(compute_i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused, te.thread_axis("blockIdx.x"))
+    compute_i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused = s[compute].fuse(compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i)
+    s[compute].bind(compute_i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused, te.thread_axis("vthread"))
+    compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused = s[compute].fuse(compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i)
+    s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread_axis("threadIdx.x"))
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=6)
+    s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+    s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=3)
+    s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+    s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
+    s[compute].pragma(compute_nn_o_o_o_o, "auto_unroll_max_step", 1024)
+    s[compute].pragma(compute_nn_o_o_o_o, "unroll_explicit", True)
 
 
 
@@ -462,7 +677,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  56.708 seconds)
+   **Total running time of the script:** ( 1 minutes  57.640 seconds)
 
 
 .. _sphx_glr_download_tutorials_auto_scheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/tutorials/auto_scheduler/tune_matmul_x86.rst.txt b/docs/_sources/tutorials/auto_scheduler/tune_matmul_x86.rst.txt
index bf0709e..e5f6210 100644
--- a/docs/_sources/tutorials/auto_scheduler/tune_matmul_x86.rst.txt
+++ b/docs/_sources/tutorials/auto_scheduler/tune_matmul_x86.rst.txt
@@ -147,7 +147,7 @@ After some measurement trials, it will return the best schedule it found.
 
  .. code-block:: none
 
-    *T*T*T*T*T*T*T*T*T
+    *T*T*T*T*T*T*T*T
 
 
 
@@ -173,83 +173,29 @@ parallelization, vectorization, unrolling and operator fusion.
 
     primfn(A_1: handle, B_1: handle, C_1: handle, out_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {out: Buffer(out_2: Pointer(float32), float32, [128, 128], []),
-                 C: Buffer(C_2: Pointer(float32), float32, [128, 128], []),
+      buffers = {C: Buffer(C_2: Pointer(float32), float32, [128, 128], []),
+                 out: Buffer(out_2: Pointer(float32), float32, [128, 128], []),
                  B: Buffer(B_2: Pointer(float32), float32, [128, 128], []),
                  A: Buffer(A_2: Pointer(float32), float32, [128, 128], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C, out_1: out} {
       attr [matmul: Pointer(float32)] "storage_scope" = "global";
       allocate(matmul, float32, [16384]) {
-        for (i.outer.outer.inner: int32, 0, 2) {
-          for (j.outer.outer.inner: int32, 0, 16) {
-            for (i.outer.inner.init: int32, 0, 16) {
-              matmul[(((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8))] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 128)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 256)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 384)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 1)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 129)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 257)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 385)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 2)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 130)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 258)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 386)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 3)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 131)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 259)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 387)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 4)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 132)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 260)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 388)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 5)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 133)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 261)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 389)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 6)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 134)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 262)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 390)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 7)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 135)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 263)] = 0f32
-              matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 391)] = 0f32
-            }
-            for (k.outer: int32, 0, 128) {
-              for (i.outer.inner: int32, 0, 16) {
-                matmul[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8))] = ((float32*)matmul[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8))] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 128)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 128)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 256)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 256)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 384)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 384)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 1)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 1)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 129)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 129)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 257)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 257)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 385)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 385)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 2)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 2)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 130)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 130)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 258)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 258)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 386)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 386)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 3)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 3)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 131)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 131)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 259)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 259)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 387)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 387)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 4)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 4)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 132)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 132)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 260)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 260)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 388)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 388)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 5)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 5)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 133)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 133)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 261)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 261)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 389)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 389)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 6)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 6)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 134)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 134)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 262)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 262)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 390)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 390)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 7)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 7)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 135)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 135)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 263)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 263)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
-                matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 391)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 391)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
+        for (i.outer.outer.inner: int32, 0, 64) {
+          for (j.outer.outer.inner: int32, 0, 4) {
+            matmul[ramp(((i.outer.outer.inner*256) + (j.outer.outer.inner*32)), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 128), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 8), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 136), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 16), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 144), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 24), 1, 8)] = broadcast(0f32, 8)
+            matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 152), 1, 8)] = broadcast(0f32, 8)
+            for (k.outer: int32, 0, 8) {
+              for (j.outer.inner: int32, 0, 4) {
+                for (k.inner: int32, 0, 16) {
+                  matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)] = ((float32x8*)matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)] + (broadcast((float32*)A_2[(((i.outer.outer.inner*256) + (k.outer*16)) + k.inner)], 8)*(float32x8*)B_2[ramp(((((k.outer*2048) + (k.inner*128)) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)]))
+                  matmul[ramp(((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)) + 128), 1, 8)] = ((float32x8*)matmul[ramp(((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)) + 128), 1, 8)] + (broadcast((float32*)A_2[((((i.outer.outer.inner*256) + (k.outer*16)) + k.inner) + 128)], 8)*(float32x8*)B_2[ramp(((((k.outer*2048) + (k.inner*128)) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)]))
+                }
               }
             }
           }
@@ -308,7 +254,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.345 ms
+    Execution time of this operator: 0.130 ms
 
 
 
@@ -349,25 +295,25 @@ print the equivalent python schedule API, and build the binary again.
  .. code-block:: none
 
     Equivalent python schedule:
-    i, j, k = tuple(matmul.op.axis) + tuple(matmul.op.reduce_axis)
-    i, j = tuple(out.op.axis) + tuple(out.op.reduce_axis)
-    i_o_i, i_i = s[matmul].split(i, factor=1)
-    i_o_o_i, i_o_i = s[matmul].split(i_o_i, factor=128)
-    i_o_o_o, i_o_o_i = s[matmul].split(i_o_o_i, factor=1)
-    j_o_i, j_i = s[matmul].split(j, factor=4)
-    j_o_o_i, j_o_i = s[matmul].split(j_o_i, factor=8)
-    j_o_o_o, j_o_o_i = s[matmul].split(j_o_o_i, factor=4)
-    k_o, k_i = s[matmul].split(k, factor=4)
-    s[matmul].reorder(i_o_o_o, j_o_o_o, i_o_o_i, j_o_o_i, k_o, i_o_i, j_o_i, k_i, i_i, j_i)
-    i_o, i_i = s[out].split(i, factor=128)
-    j_o, j_i = s[out].split(j, factor=128)
-    s[out].reorder(i_o, j_o, i_i, j_i)
-    s[matmul].compute_at(s[out], j_o)
-    i_o_j_o_fused = s[out].fuse(i_o, j_o)
-    s[out].parallel(i_o_j_o_fused)
-    s[matmul].pragma(i_o_o_o, "auto_unroll_max_step", 64)
-    s[matmul].pragma(i_o_o_o, "unroll_explicit", True)
-    s[matmul].vectorize(j_i)
+    matmul_i, matmul_j, matmul_k = tuple(matmul.op.axis) + tuple(matmul.op.reduce_axis)
+    out_i, out_j = tuple(out.op.axis) + tuple(out.op.reduce_axis)
+    matmul_i_o_i, matmul_i_i = s[matmul].split(matmul_i, factor=8)
+    matmul_i_o_o_i, matmul_i_o_i = s[matmul].split(matmul_i_o_i, factor=16)
+    matmul_i_o_o_o, matmul_i_o_o_i = s[matmul].split(matmul_i_o_o_i, factor=1)
+    matmul_j_o_i, matmul_j_i = s[matmul].split(matmul_j, factor=1)
+    matmul_j_o_o_i, matmul_j_o_i = s[matmul].split(matmul_j_o_i, factor=128)
+    matmul_j_o_o_o, matmul_j_o_o_i = s[matmul].split(matmul_j_o_o_i, factor=1)
+    matmul_k_o, matmul_k_i = s[matmul].split(matmul_k, factor=4)
+    s[matmul].reorder(matmul_i_o_o_o, matmul_j_o_o_o, matmul_i_o_o_i, matmul_j_o_o_i, matmul_k_o, matmul_i_o_i, matmul_j_o_i, matmul_k_i, matmul_i_i, matmul_j_i)
+    out_i_o, out_i_i = s[out].split(out_i, factor=128)
+    out_j_o, out_j_i = s[out].split(out_j, factor=128)
+    s[out].reorder(out_i_o, out_j_o, out_i_i, out_j_i)
+    s[matmul].compute_at(s[out], out_j_o)
+    out_i_o_j_o_fused = s[out].fuse(out_i_o, out_j_o)
+    s[out].parallel(out_i_o_j_o_fused)
+    s[matmul].pragma(matmul_i_o_o_o, "auto_unroll_max_step", 512)
+    s[matmul].pragma(matmul_i_o_o_o, "unroll_explicit", True)
+    s[matmul].vectorize(matmul_j_i)
 
 
 
@@ -422,7 +368,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  46.171 seconds)
+   **Total running time of the script:** ( 1 minutes  35.283 seconds)
 
 
 .. _sphx_glr_download_tutorials_auto_scheduler_tune_matmul_x86.py:
diff --git a/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
index 11a315f..20b777d 100644
--- a/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,11 +5,11 @@
 
 Computation times
 =================
-**01:15.723** total execution time for **tutorials_autotvm** files:
-
-- **00:48.468**: :ref:`sphx_glr_tutorials_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
-- **00:26.623**: :ref:`sphx_glr_tutorials_autotvm_tune_simple_template.py` (``tune_simple_template.py``)
-- **00:00.184**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
-- **00:00.157**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
-- **00:00.147**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
-- **00:00.145**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+**01:18.464** total execution time for **tutorials_autotvm** files:
+
+- **00:54.966**: :ref:`sphx_glr_tutorials_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)
+- **00:22.896**: :ref:`sphx_glr_tutorials_autotvm_tune_simple_template.py` (``tune_simple_template.py``)
+- **00:00.160**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)
+- **00:00.155**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)
+- **00:00.144**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``)
+- **00:00.143**: :ref:`sphx_glr_tutorials_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)
diff --git a/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
index 245d75a..12f1959 100644
--- a/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/tutorials/autotvm/tune_conv2d_cuda.rst.txt
@@ -238,26 +238,26 @@ for this template
        7 unroll_explicit: OtherOption([0, 1]) len=2
     )
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 29.89/29.89     result: MeasureResult(costs=(0.007745029785714286,), error_no=0, all_cost=3.028876304626465, timestamp=1601335316.9973547)      [('tile_f', [-1, 32, 1, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7166780
-    No: 2   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 3   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 4   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 5   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 6   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 7   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 8   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 9   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 10  GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 11  GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 12  GFLOPS: 49.33/49.33     result: MeasureResult(costs=(0.004693308181818182,), error_no=0, all_cost=3.1705777645111084, timestamp=1601335327.987154)      [('tile_f', [-1, 2, 8, 2]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2077980
-    No: 13  GFLOPS: 0.00/49.33      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 14  GFLOPS: 73.80/73.80     result: MeasureResult(costs=(0.00313695178125,), error_no=0, all_cost=2.184481620788574, timestamp=1601335329.6290433)  [('tile_f', [-1, 2, 16, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,8726459
-    No: 15  GFLOPS: 27.60/73.80     result: MeasureResult(costs=(0.008387928916666667,), error_no=0, all_cost=2.155811071395874, timestamp=1601335330.810771)       [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5905444
-    No: 16  GFLOPS: 1.61/73.80      result: MeasureResult(costs=(0.143430149,), error_no=0, all_cost=4.961726427078247, timestamp=1601335333.5001879)       [('tile_f', [-1, 2, 8, 8]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7428895
-    No: 17  GFLOPS: 0.00/73.80      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
-    No: 18  GFLOPS: 0.00/73.80      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11df8a2) [0x7f169b27b8a2]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f169b27ce9b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
-    No: 19  GFLOPS: 23.76/73.80     result: MeasureResult(costs=(0.009743294818181819,), error_no=0, all_cost=1.9935996532440186, timestamp=1601335342.6303432)     [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,782066
-    No: 20  GFLOPS: 0.00/73.80      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11df8a2) [0x7f169b27b8a2]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f169b27ce9b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
+    No: 1   GFLOPS: 36.63/36.63     result: MeasureResult(costs=(0.006320022761904762,), error_no=0, all_cost=3.6816139221191406, timestamp=1601668684.1177537)     [('tile_f', [-1, 32, 1, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7166780
+    No: 2   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 3   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 4   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 5   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 6   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 7   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 8   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 9   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 10  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 11  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 12  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMArrayAlloc+0xe2) [0x7f8cae07a9d2]\n  [bt] (4) /workspace/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x206) [0x7f8cae07a836]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCDeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xb8f) [0x [...]
+    No: 13  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 14  GFLOPS: 83.32/83.32     result: MeasureResult(costs=(0.0027784159811320755,), error_no=0, all_cost=1.993711233139038, timestamp=1601668696.6355245)     [('tile_f', [-1, 2, 16, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,8726459
+    No: 15  GFLOPS: 35.61/83.32     result: MeasureResult(costs=(0.0065010153125,), error_no=0, all_cost=1.6496946811676025, timestamp=1601668697.651509)   [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5905444
+    No: 16  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11dd342) [0x7f8cae0e1342]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f8cae0e292b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
+    No: 17  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(InstantiationError('Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::PrimFunc [...]
+    No: 18  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11dd342) [0x7f8cae0e1342]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f8cae0e292b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
+    No: 19  GFLOPS: 29.61/83.32     result: MeasureResult(costs=(0.00781925976923077,), error_no=0, all_cost=1.381317377090454, timestamp=1601668714.405927)        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,782066
+    No: 20  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(RuntimeError('Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11dd342) [0x7f8cae0e1342]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f8cae0e292b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
 
 
 
@@ -310,7 +310,7 @@ and measure running time.
 
     Best config:
     [('tile_f', [-1, 2, 16, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,8726459
-    Time cost of this operator: 0.003474
+    Time cost of this operator: 0.002609
 
 
 
diff --git a/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt b/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
index ef6c748..c96bb60 100644
--- a/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
+++ b/docs/_sources/tutorials/autotvm/tune_simple_template.rst.txt
@@ -365,16 +365,16 @@ used to get the best config later.
  .. code-block:: none
 
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 9.95/9.95       result: MeasureResult(costs=(0.0269896166,), error_no=0, all_cost=0.985429048538208, timestamp=1601335288.7511094)      [('tile_y', [-1, 8]), ('tile_x', [-1, 32])],None,53
-    No: 2   GFLOPS: 12.65/12.65     result: MeasureResult(costs=(0.021222210999999998,), error_no=0, all_cost=1.316232442855835, timestamp=1601335289.859437)       [('tile_y', [-1, 128]), ('tile_x', [-1, 256])],None,87
-    No: 3   GFLOPS: 15.51/15.51     result: MeasureResult(costs=(0.0173104734,), error_no=0, all_cost=1.3434505462646484, timestamp=1601335290.9338927)     [('tile_y', [-1, 8]), ('tile_x', [-1, 512])],None,93
-    No: 4   GFLOPS: 13.09/15.51     result: MeasureResult(costs=(0.0205033932,), error_no=0, all_cost=1.124967098236084, timestamp=1601335292.0556455)      [('tile_y', [-1, 128]), ('tile_x', [-1, 512])],None,97
-    No: 5   GFLOPS: 2.09/15.51      result: MeasureResult(costs=(0.1285110386,), error_no=0, all_cost=2.9166505336761475, timestamp=1601335294.8791656)     [('tile_y', [-1, 256]), ('tile_x', [-1, 4])],None,28
-    No: 6   GFLOPS: 9.10/15.51      result: MeasureResult(costs=(0.0294933192,), error_no=0, all_cost=1.184211254119873, timestamp=1601335296.1353078)      [('tile_y', [-1, 4]), ('tile_x', [-1, 32])],None,52
-    No: 7   GFLOPS: 12.65/15.51     result: MeasureResult(costs=(0.0212155018,), error_no=0, all_cost=1.0387556552886963, timestamp=1601335297.3753889)     [('tile_y', [-1, 2]), ('tile_x', [-1, 512])],None,91
-    No: 8   GFLOPS: 10.79/15.51     result: MeasureResult(costs=(0.024885154200000002,), error_no=0, all_cost=1.033494234085083, timestamp=1601335298.574376)       [('tile_y', [-1, 2]), ('tile_x', [-1, 256])],None,81
-    No: 9   GFLOPS: 0.92/15.51      result: MeasureResult(costs=(0.2920148466,), error_no=0, all_cost=5.719310760498047, timestamp=1601335305.6552417)      [('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
-    No: 10  GFLOPS: 1.18/15.51      result: MeasureResult(costs=(0.2275096714,), error_no=0, all_cost=4.696672439575195, timestamp=1601335310.067456)       [('tile_y', [-1, 1]), ('tile_x', [-1, 2])],None,10
+    No: 1   GFLOPS: 4.53/4.53       result: MeasureResult(costs=(0.0592549474,), error_no=0, all_cost=1.813521385192871, timestamp=1601668657.3705232)      [('tile_y', [-1, 8]), ('tile_x', [-1, 32])],None,53
+    No: 2   GFLOPS: 16.86/16.86     result: MeasureResult(costs=(0.0159222514,), error_no=0, all_cost=1.2602179050445557, timestamp=1601668658.325927)      [('tile_y', [-1, 128]), ('tile_x', [-1, 256])],None,87
+    No: 3   GFLOPS: 13.18/16.86     result: MeasureResult(costs=(0.0203596778,), error_no=0, all_cost=1.2592532634735107, timestamp=1601668659.301608)      [('tile_y', [-1, 8]), ('tile_x', [-1, 512])],None,93
+    No: 4   GFLOPS: 12.46/16.86     result: MeasureResult(costs=(0.0215495506,), error_no=0, all_cost=1.2881958484649658, timestamp=1601668660.275171)      [('tile_y', [-1, 128]), ('tile_x', [-1, 512])],None,97
+    No: 5   GFLOPS: 2.87/16.86      result: MeasureResult(costs=(0.0934988552,), error_no=0, all_cost=2.17838454246521, timestamp=1601668662.29342) [('tile_y', [-1, 256]), ('tile_x', [-1, 4])],None,28
+    No: 6   GFLOPS: 5.08/16.86      result: MeasureResult(costs=(0.0528449238,), error_no=0, all_cost=1.7616806030273438, timestamp=1601668663.805897)      [('tile_y', [-1, 4]), ('tile_x', [-1, 32])],None,52
+    No: 7   GFLOPS: 10.76/16.86     result: MeasureResult(costs=(0.024955729,), error_no=0, all_cost=1.1567623615264893, timestamp=1601668664.8381867)      [('tile_y', [-1, 2]), ('tile_x', [-1, 512])],None,91
+    No: 8   GFLOPS: 13.48/16.86     result: MeasureResult(costs=(0.0199137338,), error_no=0, all_cost=1.1330161094665527, timestamp=1601668665.7802334)     [('tile_y', [-1, 2]), ('tile_x', [-1, 256])],None,81
+    No: 9   GFLOPS: 1.33/16.86      result: MeasureResult(costs=(0.2017065944,), error_no=0, all_cost=4.308288097381592, timestamp=1601668669.9560215)      [('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
+    No: 10  GFLOPS: 1.37/16.86      result: MeasureResult(costs=(0.1966458516,), error_no=0, all_cost=3.632107973098755, timestamp=1601668673.6803129)      [('tile_y', [-1, 1]), ('tile_x', [-1, 2])],None,10
 
 
 
diff --git a/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt b/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
index 7543617..d0dcd93 100644
--- a/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
+++ b/docs/_sources/tutorials/dev/low_level_custom_pass.rst.txt
@@ -74,8 +74,8 @@ our customized lowering pass to manipulate the IR directly instead of using sche
 
     primfn(a_1: handle, b_1: handle, c_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {b: Buffer(b_2: Pointer(float32), float32, [128], []),
-                 c: Buffer(c_2: Pointer(float32), float32, [128], []),
+      buffers = {c: Buffer(c_2: Pointer(float32), float32, [128], []),
+                 b: Buffer(b_2: Pointer(float32), float32, [128], []),
                  a: Buffer(a_2: Pointer(float32), float32, [128], [])}
       buffer_map = {a_1: a, b_1: b, c_1: c} {
       for (i: int32, 0, 128) {
diff --git a/docs/_sources/tutorials/dev/sg_execution_times.rst.txt b/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
index 4ece804..b5eed06 100644
--- a/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/dev/sg_execution_times.rst.txt
@@ -5,8 +5,8 @@
 
 Computation times
 =================
-**00:32.460** total execution time for **tutorials_dev** files:
+**00:29.319** total execution time for **tutorials_dev** files:
 
-- **00:31.830**: :ref:`sphx_glr_tutorials_dev_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
-- **00:00.442**: :ref:`sphx_glr_tutorials_dev_use_pass_infra.py` (``use_pass_infra.py``)
-- **00:00.189**: :ref:`sphx_glr_tutorials_dev_low_level_custom_pass.py` (``low_level_custom_pass.py``)
+- **00:28.796**: :ref:`sphx_glr_tutorials_dev_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``)
+- **00:00.362**: :ref:`sphx_glr_tutorials_dev_use_pass_infra.py` (``use_pass_infra.py``)
+- **00:00.161**: :ref:`sphx_glr_tutorials_dev_low_level_custom_pass.py` (``low_level_custom_pass.py``)
diff --git a/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt b/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
index 6b16529..a400aeb 100644
--- a/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_model_on_android.rst.txt
@@ -421,7 +421,7 @@ Execute on TVM
 
     TVM prediction top-1: tiger cat
     Evaluate inference time cost...
-    Mean inference time (std dev): 25.35 ms (0.05 ms)
+    Mean inference time (std dev): 16.55 ms (1.74 ms)
 
 
 
diff --git a/docs/_sources/tutorials/frontend/deploy_object_detection_pytorch.rst.txt b/docs/_sources/tutorials/frontend/deploy_object_detection_pytorch.rst.txt
index 04907a6..1c40b2e 100644
--- a/docs/_sources/tutorials/frontend/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_object_detection_pytorch.rst.txt
@@ -237,7 +237,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  50.257 seconds)
+   **Total running time of the script:** ( 1 minutes  35.114 seconds)
 
 
 .. _sphx_glr_download_tutorials_frontend_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt b/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
index 1a8357e..e17044c 100644
--- a/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_prequantized.rst.txt
@@ -321,7 +321,7 @@ output values are identical out of 1000 outputs from mobilenet v2.
 
  .. code-block:: none
 
-    196 in 1000 raw floating outputs identical.
+    188 in 1000 raw floating outputs identical.
 
 
 
@@ -348,7 +348,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
  .. code-block:: none
 
-    Elapsed average ms: 20.04376112
+    Elapsed average ms: 21.65541768
 
 
 
diff --git a/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt b/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt
index 07e0f28..1b3ee88 100644
--- a/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_prequantized_tflite.rst.txt
@@ -368,7 +368,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
  .. code-block:: none
 
-    Elapsed average ms: 36.152443829999996
+    Elapsed average ms: 33.15524846
 
 
 
@@ -401,7 +401,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  36.780 seconds)
+   **Total running time of the script:** ( 2 minutes  16.810 seconds)
 
 
 .. _sphx_glr_download_tutorials_frontend_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt b/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
index c3339b5..a09a847 100644
--- a/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/tutorials/frontend/deploy_ssd_gluoncv.rst.txt
@@ -319,7 +319,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  53.859 seconds)
+   **Total running time of the script:** ( 1 minutes  26.063 seconds)
 
 
 .. _sphx_glr_download_tutorials_frontend_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/tutorials/frontend/from_onnx.rst.txt b/docs/_sources/tutorials/frontend/from_onnx.rst.txt
index af04a47..3b76bd9 100644
--- a/docs/_sources/tutorials/frontend/from_onnx.rst.txt
+++ b/docs/_sources/tutorials/frontend/from_onnx.rst.txt
@@ -156,7 +156,7 @@ Execute on TVM
 
  .. code-block:: none
 
-
    ...47%, 0.01 MB, 39 KB/s, 0 seconds passed
    ...94%, 0.02 MB, 76 KB/s, 0 seconds passed
    ...100%, 0.02 MB, 115 KB/s, 0 seconds passed
+
    ...47%, 0.01 MB, 93 KB/s, 0 seconds passed
    ...94%, 0.02 MB, 186 KB/s, 0 seconds passed
    ...100%, 0.02 MB, 278 KB/s, 0 seconds passed
     Cannot find config for target=llvm -keys=cpu, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 32, 224, 224), 'float32'), ('TENSOR', (9, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm -keys=cpu, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 64, 224, 224), 'float32'), ('TENSOR', (32, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
     Cannot find config for target=llvm -keys=cpu, workload=('conv2d_NCHWc.x86', ('TENSOR', (1, 1, 224, 224), 'float32'), ('TENSOR', (64, 1, 5, 5), 'float32'), (1, 1), (2, 2, 2, 2), (1, 1), 'NCHW', 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.
diff --git a/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
index dd6d9b7..5c63bad 100644
--- a/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,24 +5,24 @@
 
 Computation times
 =================
-**10:37.661** total execution time for **tutorials_frontend** files:
+**08:58.956** total execution time for **tutorials_frontend** files:
 
-- **02:36.780**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
-- **01:53.859**: :ref:`sphx_glr_tutorials_frontend_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
-- **01:50.257**: :ref:`sphx_glr_tutorials_frontend_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
-- **00:39.325**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized.py` (``deploy_prequantized.py``)
-- **00:37.911**: :ref:`sphx_glr_tutorials_frontend_from_tensorflow.py` (``from_tensorflow.py``)
-- **00:31.934**: :ref:`sphx_glr_tutorials_frontend_deploy_quantized.py` (``deploy_quantized.py``)
-- **00:26.331**: :ref:`sphx_glr_tutorials_frontend_from_tflite.py` (``from_tflite.py``)
-- **00:22.983**: :ref:`sphx_glr_tutorials_frontend_from_darknet.py` (``from_darknet.py``)
-- **00:16.754**: :ref:`sphx_glr_tutorials_frontend_from_caffe2.py` (``from_caffe2.py``)
-- **00:15.161**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
-- **00:14.067**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_android.py` (``deploy_model_on_android.py``)
-- **00:11.641**: :ref:`sphx_glr_tutorials_frontend_from_keras.py` (``from_keras.py``)
-- **00:11.624**: :ref:`sphx_glr_tutorials_frontend_from_pytorch.py` (``from_pytorch.py``)
-- **00:09.558**: :ref:`sphx_glr_tutorials_frontend_from_coreml.py` (``from_coreml.py``)
-- **00:08.824**: :ref:`sphx_glr_tutorials_frontend_from_mxnet.py` (``from_mxnet.py``)
-- **00:05.608**: :ref:`sphx_glr_tutorials_frontend_build_gcn.py` (``build_gcn.py``)
-- **00:02.880**: :ref:`sphx_glr_tutorials_frontend_using_external_lib.py` (``using_external_lib.py``)
-- **00:01.993**: :ref:`sphx_glr_tutorials_frontend_from_onnx.py` (``from_onnx.py``)
-- **00:00.173**: :ref:`sphx_glr_tutorials_frontend_deploy_sparse.py` (``deploy_sparse.py``)
+- **02:16.810**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)
+- **01:35.114**: :ref:`sphx_glr_tutorials_frontend_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``)
+- **01:26.063**: :ref:`sphx_glr_tutorials_frontend_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)
+- **00:36.010**: :ref:`sphx_glr_tutorials_frontend_deploy_prequantized.py` (``deploy_prequantized.py``)
+- **00:29.040**: :ref:`sphx_glr_tutorials_frontend_from_tensorflow.py` (``from_tensorflow.py``)
+- **00:24.832**: :ref:`sphx_glr_tutorials_frontend_deploy_quantized.py` (``deploy_quantized.py``)
+- **00:21.702**: :ref:`sphx_glr_tutorials_frontend_from_tflite.py` (``from_tflite.py``)
+- **00:20.680**: :ref:`sphx_glr_tutorials_frontend_from_darknet.py` (``from_darknet.py``)
+- **00:16.347**: :ref:`sphx_glr_tutorials_frontend_from_caffe2.py` (``from_caffe2.py``)
+- **00:13.192**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)
+- **00:12.357**: :ref:`sphx_glr_tutorials_frontend_deploy_model_on_android.py` (``deploy_model_on_android.py``)
+- **00:10.381**: :ref:`sphx_glr_tutorials_frontend_from_pytorch.py` (``from_pytorch.py``)
+- **00:10.004**: :ref:`sphx_glr_tutorials_frontend_from_keras.py` (``from_keras.py``)
+- **00:09.348**: :ref:`sphx_glr_tutorials_frontend_from_coreml.py` (``from_coreml.py``)
+- **00:08.209**: :ref:`sphx_glr_tutorials_frontend_from_mxnet.py` (``from_mxnet.py``)
+- **00:04.892**: :ref:`sphx_glr_tutorials_frontend_build_gcn.py` (``build_gcn.py``)
+- **00:02.176**: :ref:`sphx_glr_tutorials_frontend_using_external_lib.py` (``using_external_lib.py``)
+- **00:01.644**: :ref:`sphx_glr_tutorials_frontend_from_onnx.py` (``from_onnx.py``)
+- **00:00.155**: :ref:`sphx_glr_tutorials_frontend_deploy_sparse.py` (``deploy_sparse.py``)
diff --git a/docs/_sources/tutorials/get_started/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorials/get_started/cross_compilation_and_rpc.rst.txt
index cd7929b..0ccf610 100644
--- a/docs/_sources/tutorials/get_started/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorials/get_started/cross_compilation_and_rpc.rst.txt
@@ -235,7 +235,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.213e-07 secs/op
+    1.794e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorials/get_started/relay_quick_start.rst.txt b/docs/_sources/tutorials/get_started/relay_quick_start.rst.txt
index c25f168..f2d7fc3 100644
--- a/docs/_sources/tutorials/get_started/relay_quick_start.rst.txt
+++ b/docs/_sources/tutorials/get_started/relay_quick_start.rst.txt
@@ -224,7 +224,7 @@ in this example. Then the machine code will be generated as the module library.
 
  .. code-block:: none
 
-
    ...1%, 0.01 MB, 68 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 127 KB/s, 0 seconds passed
    ...5%, 0.02 MB, 190 KB/s, 0 seconds passed
    ...7%, 0.03 MB, 251 KB/s, 0 seconds passed
    ...9%, 0.04 MB, 312 KB/s, 0 seconds passed
    ...11%, 0.05 MB, 355 KB/s, 0 seconds passed
    ...13%, 0.05 MB, 411 KB/s, 0 seconds passed
    ...15%, 0.06 MB, 467 KB/s, 0 seconds passed
    ...17%, 0.07 MB, 525 KB/s, 0 seconds passed
    ...19%, 0.08 MB, 580 KB/s, 0 seconds passed
    ...21%, 0.09 MB, 634 KB/s, 0 seconds passed
    ...23%, 0.09 MB, 670 KB/s, 0 seconds passed
    ...25%, 0.10 MB, 721 KB/s, 0 seconds passed
    ...27%, 0.11 MB, 775 KB/s, 0 seconds passed
    ...29%, 0.12 MB, 830 KB/s, 0 seconds passed
    ...31%, 0.12 MB, 879 KB/s, 0 seconds passed
    ...33%, 0.13 MB, 934 KB/s, 0 seconds passed
    ...35%, 0.14 MB, 983 KB/s, 0 seconds passed
    ...37%, 0.15 MB, 1037 KB/s, 0 seconds passed
    ...39%, 0.16 MB, 1086 KB/s, 0 seconds passed
    ...41%, 0.16 MB, 1140 KB/s, 0 second
 s passed
    ...43%, 0.17 MB, 1186 KB/s, 0 seconds passed
    ...45%, 0.18 MB, 1239 KB/s, 0 seconds passed
    ...47%, 0.19 MB, 1292 KB/s, 0 seconds passed
    ...49%, 0.20 MB, 1315 KB/s, 0 seconds passed
    ...51%, 0.20 MB, 1367 KB/s, 0 seconds passed
    ...53%, 0.21 MB, 1412 KB/s, 0 seconds passed
    ...55%, 0.22 MB, 1463 KB/s, 0 seconds passed
    ...57%, 0.23 MB, 1511 KB/s, 0 seconds passed
    ...59%, 0.23 MB, 1563 KB/s, 0 seconds passed
    ...61%, 0.24 MB, 1614 KB/s, 0 seconds passed
    ...63%, 0.25 MB, 1665 KB/s, 0 seconds passed
    ...65%, 0.26 MB, 1700 KB/s, 0 seconds passed
    ...67%, 0.27 MB, 1750 KB/s, 0 seconds passed
    ...69%, 0.27 MB, 1801 KB/s, 0 seconds passed
    ...71%, 0.28 MB, 1852 KB/s, 0 seconds passed
    ...73%, 0.29 MB, 1896 KB/s, 0 seconds passed
    ...75%, 0.30 MB, 1946 KB/s, 0 seconds passed
    ...77%, 0.30 MB, 1996 KB/s, 0 seconds passed
    ...79%, 0.31 MB, 2047 KB/s, 0 seconds passed
    ...81%, 0.32 MB, 2087 KB/s, 0 seconds passed
    ...8
 3%, 0.33 MB, 2137 KB/s, 0 seconds passed
    ...85%, 0.34 MB, 2187 KB/s, 0 seconds passed
    ...87%, 0.34 MB, 2237 KB/s, 0 seconds passed
    ...89%, 0.35 MB, 2281 KB/s, 0 seconds passed
    ...91%, 0.36 MB, 2330 KB/s, 0 seconds passed
    ...93%, 0.37 MB, 2337 KB/s, 0 seconds passed
    ...95%, 0.38 MB, 2386 KB/s, 0 seconds passed
    ...97%, 0.38 MB, 2434 KB/s, 0 seconds passed
    ...99%, 0.39 MB, 2483 KB/s, 0 seconds passed
    ...100%, 0.40 MB, 2530 KB/s, 0 seconds passed
+
    ...1%, 0.01 MB, 444 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 864 KB/s, 0 seconds passed
    ...5%, 0.02 MB, 1282 KB/s, 0 seconds passed
    ...7%, 0.03 MB, 1677 KB/s, 0 seconds passed
    ...9%, 0.04 MB, 2034 KB/s, 0 seconds passed
    ...11%, 0.05 MB, 2415 KB/s, 0 seconds passed
    ...13%, 0.05 MB, 2741 KB/s, 0 seconds passed
    ...15%, 0.06 MB, 3074 KB/s, 0 seconds passed
    ...17%, 0.07 MB, 3426 KB/s, 0 seconds passed
    ...19%, 0.08 MB, 3739 KB/s, 0 seconds passed
    ...21%, 0.09 MB, 4036 KB/s, 0 seconds passed
    ...23%, 0.09 MB, 4327 KB/s, 0 seconds passed
    ...25%, 0.10 MB, 4608 KB/s, 0 seconds passed
    ...27%, 0.11 MB, 4839 KB/s, 0 seconds passed
    ...29%, 0.12 MB, 5145 KB/s, 0 seconds passed
    ...31%, 0.12 MB, 5446 KB/s, 0 seconds passed
    ...33%, 0.13 MB, 5729 KB/s, 0 seconds passed
    ...35%, 0.14 MB, 5925 KB/s, 0 seconds passed
    ...37%, 0.15 MB, 6217 KB/s, 0 seconds passed
    ...39%, 0.16 MB, 6449 KB/s, 0 seconds passed
    ...41%, 0.16 MB, 67
 29 KB/s, 0 seconds passed
    ...43%, 0.17 MB, 6941 KB/s, 0 seconds passed
    ...45%, 0.18 MB, 7146 KB/s, 0 seconds passed
    ...47%, 0.19 MB, 7398 KB/s, 0 seconds passed
    ...49%, 0.20 MB, 7621 KB/s, 0 seconds passed
    ...51%, 0.20 MB, 7858 KB/s, 0 seconds passed
    ...53%, 0.21 MB, 8041 KB/s, 0 seconds passed
    ...55%, 0.22 MB, 8286 KB/s, 0 seconds passed
    ...57%, 0.23 MB, 8538 KB/s, 0 seconds passed
    ...59%, 0.23 MB, 8581 KB/s, 0 seconds passed
    ...61%, 0.24 MB, 8806 KB/s, 0 seconds passed
    ...63%, 0.25 MB, 9045 KB/s, 0 seconds passed
    ...65%, 0.26 MB, 9296 KB/s, 0 seconds passed
    ...67%, 0.27 MB, 9351 KB/s, 0 seconds passed
    ...69%, 0.27 MB, 9589 KB/s, 0 seconds passed
    ...71%, 0.28 MB, 9819 KB/s, 0 seconds passed
    ...73%, 0.29 MB, 10055 KB/s, 0 seconds passed
    ...75%, 0.30 MB, 10184 KB/s, 0 seconds passed
    ...77%, 0.30 MB, 10416 KB/s, 0 seconds passed
    ...79%, 0.31 MB, 10584 KB/s, 0 seconds passed
    ...81%, 0.32 MB, 10814 KB/s, 0 s
 econds passed
    ...83%, 0.33 MB, 10794 KB/s, 0 seconds passed
    ...85%, 0.34 MB, 11013 KB/s, 0 seconds passed
    ...87%, 0.34 MB, 11200 KB/s, 0 seconds passed
    ...89%, 0.35 MB, 11413 KB/s, 0 seconds passed
    ...91%, 0.36 MB, 11477 KB/s, 0 seconds passed
    ...93%, 0.37 MB, 11677 KB/s, 0 seconds passed
    ...95%, 0.38 MB, 11878 KB/s, 0 seconds passed
    ...97%, 0.38 MB, 12085 KB/s, 0 seconds passed
    ...99%, 0.39 MB, 12269 KB/s, 0 seconds passed
    ...100%, 0.40 MB, 12421 KB/s, 0 seconds passed
     Cannot find config for target=cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32, workload=('dense_small_batch.cuda', ('TENSOR', (1, 512), 'float32'), ('TENSOR', (1000, 512), 'float32'), None, 'float32'). A fallback configuration is used, which may bring great performance regression.
 
 
diff --git a/docs/_sources/tutorials/get_started/sg_execution_times.rst.txt b/docs/_sources/tutorials/get_started/sg_execution_times.rst.txt
index 2547245..cdc7741 100644
--- a/docs/_sources/tutorials/get_started/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/get_started/sg_execution_times.rst.txt
@@ -5,8 +5,9 @@
 
 Computation times
 =================
-**00:16.435** total execution time for **tutorials_get_started** files:
+**00:14.841** total execution time for **tutorials_get_started** files:
 
-- **00:15.950**: :ref:`sphx_glr_tutorials_get_started_relay_quick_start.py` (``relay_quick_start.py``)
-- **00:00.349**: :ref:`sphx_glr_tutorials_get_started_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
-- **00:00.136**: :ref:`sphx_glr_tutorials_get_started_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:14.193**: :ref:`sphx_glr_tutorials_get_started_relay_quick_start.py` (``relay_quick_start.py``)
+- **00:00.389**: :ref:`sphx_glr_tutorials_get_started_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)
+- **00:00.173**: :ref:`sphx_glr_tutorials_get_started_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``)
+- **00:00.087**: :ref:`sphx_glr_tutorials_get_started_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)
diff --git a/docs/_sources/tutorials/get_started/tensor_expr_get_started.rst.txt b/docs/_sources/tutorials/get_started/tensor_expr_get_started.rst.txt
index b155468..6e36e63 100644
--- a/docs/_sources/tutorials/get_started/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorials/get_started/tensor_expr_get_started.rst.txt
@@ -325,7 +325,7 @@ The following code first performs the following steps:
 
  .. code-block:: none
 
-    ['myadd.tvm_meta.json', 'myadd.ptx', 'myadd.so', 'myadd.o']
+    ['myadd.tvm_meta.json', 'myadd.so', 'myadd.ptx', 'myadd.o']
 
 
 
diff --git a/docs/_sources/tutorials/get_started/tvmc_command_line_driver.rst.txt b/docs/_sources/tutorials/get_started/tvmc_command_line_driver.rst.txt
new file mode 100644
index 0000000..aeda5a1
--- /dev/null
+++ b/docs/_sources/tutorials/get_started/tvmc_command_line_driver.rst.txt
@@ -0,0 +1,371 @@
+.. note::
+    :class: sphx-glr-download-link-note
+
+    Click :ref:`here <sphx_glr_download_tutorials_get_started_tvmc_command_line_driver.py>` to download the full example code
+.. rst-class:: sphx-glr-example-title
+
+.. _sphx_glr_tutorials_get_started_tvmc_command_line_driver.py:
+
+
+Getting Started with TVM command line driver - TVMC
+===================================================
+**Authors**:
+`Leandro Nunes <https://github.com/leandron>`_,
+`Matthew Barrett <https://github.com/mbaret>`_
+
+This tutorial is an introduction to working with TVMC, the TVM command
+line driver. TVMC is a tool that exposes TVM features such as
+auto-tuning, compiling, profiling and execution of models, via a
+command line interface.
+
+In this tutorial we are going to use TVMC to compile, run and tune a
+ResNet-50 on a x86 CPU.
+
+We are going to start by downloading ResNet 50 V2. Then, we are going
+to use TVMC to compile this model into a TVM module, and use the
+compiled module to generate predictions. Finally, we are going to experiment
+with the auto-tuning options, that can be used to help the compiler to
+improve network performance.
+
+The final goal is to give an overview of TVMC's capabilities and also
+some guidance on where to look for more information.
+
+Using TVMC
+----------
+
+TVMC is a Python application, part of the TVM Python package.
+When you install TVM using a Python package, you will get TVMC as
+as a command line application called ``tvmc``.
+
+Alternatively, if you have TVM as a Python module on your
+``$PYTHONPATH``,you can access the command line driver functionality
+via the executable python module, ``python -m tvm.driver.tvmc``.
+
+For simplicity, this tutorial will mention TVMC command line using
+``tvmc <options>``, but the same results can be obtained with
+``python -m tvm.driver.tvmc <options>``.
+
+You can check the help page using:
+
+.. code-block:: bash
+
+  tvmc --help
+
+
+As you can see in the help page, the main features are
+accessible via the subcommands ``tune``, ``compile`` and ``run``.
+To read about specific options under a given subcommand, use
+``tvmc <subcommand> --help``.
+
+In the following sections we will use TVMC to tune, compile and
+run a model. But first, we need a model.
+
+
+Obtaining the model
+-------------------
+
+We are going to use ResNet-50 V2 as an example to experiment with TVMC.
+The version below is in ONNX format. To download the file, you can use
+the command below:
+
+.. code-block:: bash
+
+  wget https://github.com/onnx/models/raw/master/vision/classification/resnet/model/resnet50-v2-7.onnx
+
+
+
+.. note:: Supported model formats
+
+  TVMC supports models created with Keras, ONNX, TensorFlow, TFLite
+  and Torch. Use the option``--model-format`` if you need to
+  explicitly provide the model format you are using. See ``tvmc
+  compile --help`` for more information.
+
+
+Compiling the model
+-------------------
+
+The next step once we've downloaded ResNet-50, is to compile it,
+To accomplish that, we are going to use ``tvmc compile``. The
+output we get from the compilation process is a TAR package,
+that can be used to run our model on the target device.
+
+.. code-block:: bash
+
+  tvmc compile \
+    --target "llvm" \
+    --output compiled_module.tar \
+    resnet50-v2-7.onnx
+
+Once compilation finishes, the output ``compiled_module.tar`` will be created. This
+can be directly loaded by your application and run via the TVM runtime APIs.
+
+
+.. note:: Defining the correct target
+
+  Specifying the correct target (option ``--target``) can have a huge
+  impact on the performance of the compiled module, as it can take
+  advantage of hardware features available on the target. For more
+  information, please refer to `Auto-tuning a convolutional network
+  for x86 CPU <https://tvm.apache.org/docs/tutorials/autotvm/tune_relay_x86.html#define-network>`_.
+
+
+In the next step, we are going to use the compiled module, providing it
+with some inputs, to generate some predictions.
+
+
+Input pre-processing
+--------------------
+
+In order to generate predictions, we will need two things:
+
+- the compiled module, which we just produced;
+- a valid input to the model
+
+Each model is particular when it comes to expected tensor shapes, formats and data
+types. For this reason, most models require some pre and
+post processing, to ensure the input(s) is valid and to interpret the output(s).
+
+In TVMC, we adopted NumPy's ``.npz`` format for both input and output data.
+This is a well-supported NumPy format to serialize multiple arrays into a file.
+
+We will use the usual cat image, similar to other TVM tutorials:
+
+.. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
+   :height: 224px
+   :width: 224px
+   :align: center
+
+For our ResNet 50 V2 model, the input is expected to be in ImageNet format.
+Here is an example of a script to pre-process an image for ResNet 50 V2.
+
+
+
+.. code-block:: default
+
+    from tvm.contrib.download import download_testdata
+    from PIL import Image
+    import numpy as np
+
+    img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+    img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
+
+    # Resize it to 224x224
+    resized_image = Image.open(img_path).resize((224, 224))
+    img_data = np.asarray(resized_image).astype("float32")
+
+    # ONNX expects NCHW input, so convert the array
+    img_data = np.transpose(img_data, (2, 0, 1))
+
+    # Normalize according to ImageNet
+    imagenet_mean = np.array([0.485, 0.456, 0.406])
+    imagenet_stddev = np.array([0.229, 0.224, 0.225])
+    norm_img_data = np.zeros(img_data.shape).astype("float32")
+    for i in range(img_data.shape[0]):
+        norm_img_data[i, :, :] = (img_data[i, :, :] / 255 - imagenet_mean[i]) / imagenet_stddev[i]
+
+    # Add batch dimension
+    img_data = np.expand_dims(norm_img_data, axis=0)
+
+    # Save to .npz (outputs imagenet_cat.npz)
+    np.savez("imagenet_cat", data=img_data)
+
+
+
+
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out:
+
+ .. code-block:: none
+
+    File /workspace/.tvm_test_data/data/imagenet_cat.png exists, skip.
+
+
+
+Running the compiled module
+---------------------------
+
+With both the compiled module and input file in hand, we can run it by
+invoking ``tvmc run``.
+
+.. code-block:: bash
+
+   tvmc run \
+     --inputs imagenet_cat.npz \
+     --output predictions.npz \
+     compiled_module.tar
+
+When running the above command, a new file ``predictions.npz`` should
+be produced. It contains the output tensors.
+
+In this example, we are running the model on the same machine that we used
+for compilation. In some cases we might want to run it remotely via
+an RPC Tracker. To read more about these options please check ``tvmc
+run --help``.
+
+
+Output post-processing
+----------------------
+
+As previously mentioned, each model will have its own particular way
+of providing output tensors.
+
+In our case, we need to run some post-processing to render the
+outputs from ResNet 50 V2 into a more human-readable form.
+
+The script below shows an example of the post-processing to extract
+labels from the output of our compiled module.
+
+
+
+.. code-block:: default
+
+    import os.path
+    import numpy as np
+
+    from scipy.special import softmax
+
+    from tvm.contrib.download import download_testdata
+
+    # Download a list of labels
+    labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
+    labels_path = download_testdata(labels_url, "synset.txt", module="data")
+
+    with open(labels_path, "r") as f:
+        labels = [l.rstrip() for l in f]
+
+    output_file = "predictions.npz"
+
+    # Open the output and read the output tensor
+    if os.path.exists(output_file):
+        with np.load(output_file) as data:
+            scores = softmax(data["output_0"])
+            scores = np.squeeze(scores)
+            scores = np.argsort(scores)[::-1]
+
+            for i in scores[0:5]:
+                print("class='%s' with probability=%f" % (labels[i], scores[i]))
+
+
+
+
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out:
+
+ .. code-block:: none
+
+    File /workspace/.tvm_test_data/data/synset.txt exists, skip.
+
+
+
+When running the script, a list of predictions should be printed similar
+the the example below.
+
+.. code-block:: bash
+
+  $ python post_processing.py
+  class=n02123045 tabby, tabby cat ; probability=446.000000
+  class=n02123159 tiger cat ; probability=675.000000
+  class=n02124075 Egyptian cat ; probability=836.000000
+  class=n02129604 tiger, Panthera tigris ; probability=917.000000
+  class=n04040759 radiator ; probability=213.000000
+
+
+Tuning the model
+----------------
+
+In some cases, we might not get the expected performance when running
+inferences using our compiled module. In cases like this, we can make use
+of the auto-tuner, to find a better configuration for our model and
+get a boost in performance.
+
+Tuning in TVM refers to the process by which a model is optimized
+to run faster on a given target. This differs from training or
+fine-tuning in that it does not affect the accuracy of the model,
+but only the runtime performance.
+
+As part of the tuning process, TVM will try running many different
+operator implementation variants to see which perform best. The
+results of these runs are stored in a tuning records file, which is
+ultimately the output of the ``tune`` subcommand.
+
+In the simplest form, tuning requires you to provide three things:
+
+- the target specification of the device you intend to run this model on;
+- the path to an output file in which the tuning records will be stored, and finally,
+- a path to the model to be tuned.
+
+
+The example below demonstrates how that works in practice:
+
+.. code-block:: bash
+
+  tvmc tune \
+    --target "llvm" \
+    --output autotuner_records.json \
+    resnet50-v2-7.onnx
+
+
+Tuning sessions can take a long time, so ``tvmc tune`` offers many options to
+customize your tuning process, in terms of number of repetitions (``--repeat`` and
+``--number``, for example), the tuning algorithm to be use, and so on.
+Check ``tvmc tune --help`` for more information.
+
+As an output of the tuning process above, we obtained the tuning records stored
+in ``autotuner_records.json``. This file can be used in two ways:
+
+- as an input to further tuning (via ``tvmc tune --tuning-records``), or
+- as an input to the compiler
+
+The compiler will use the results to generate high performance code for the model
+on your specified target. To do that we can use ``tvmc compile --tuning-records``.
+Check ``tvmc compile --help`` for more information.
+
+
+Final Remarks
+-------------
+
+In this tutorial, we presented TVMC, a command line driver for TVM.
+We demonstrated how to compile, run and tune a model, as well
+as discussed the need for pre and post processing of inputs and outputs.
+
+Here we presented a simple example using ResNet 50 V2 locally. However, TVMC
+supports many more features including cross-compilation, remote execution and
+profiling/benchmarking.
+
+To see what other options are available, please have a look at ``tvmc --help``.
+
+
+
+.. _sphx_glr_download_tutorials_get_started_tvmc_command_line_driver.py:
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+    :class: sphx-glr-footer-example
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: tvmc_command_line_driver.py <tvmc_command_line_driver.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: tvmc_command_line_driver.ipynb <tvmc_command_line_driver.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_
diff --git a/docs/_sources/tutorials/index.rst.txt b/docs/_sources/tutorials/index.rst.txt
index 340d7c6..c38f5cb 100644
--- a/docs/_sources/tutorials/index.rst.txt
+++ b/docs/_sources/tutorials/index.rst.txt
@@ -22,6 +22,26 @@ Get Started Tutorials
 
 .. raw:: html
 
+    <div class="sphx-glr-thumbcontainer" tooltip="This tutorial is an introduction to working with TVMC, the TVM command line driver. TVMC is a t...">
+
+.. only:: html
+
+    .. figure:: /tutorials/get_started/images/thumb/sphx_glr_tvmc_command_line_driver_thumb.png
+
+        :ref:`sphx_glr_tutorials_get_started_tvmc_command_line_driver.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /tutorials/get_started/tvmc_command_line_driver
+
+.. raw:: html
+
     <div class="sphx-glr-thumbcontainer" tooltip="This example shows how to build a neural network with Relay python frontend and generates a run...">
 
 .. only:: html
diff --git a/docs/_sources/tutorials/language/scan.rst.txt b/docs/_sources/tutorials/language/scan.rst.txt
index a7d2716..29a8608 100644
--- a/docs/_sources/tutorials/language/scan.rst.txt
+++ b/docs/_sources/tutorials/language/scan.rst.txt
@@ -267,22 +267,22 @@ The following example demonstrates how we can build recurrence with two states.
 
     primfn(X_1: handle, scan.v0_1: handle, scan.v1_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {scan.v0: Buffer(scan.v0_2: Pointer(float32), float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 scan.v1: Buffer(scan.v1_2: Pointer(float32), float32, [m, l: int32], [stride_2: int32, stride_3: int32], type="auto"),
+      buffers = {scan.v1: Buffer(scan.v1_2: Pointer(float32), float32, [m: int32, l: int32], [stride: int32, stride_1: int32], type="auto"),
+                 scan.v0: Buffer(scan.v0_2: Pointer(float32), float32, [m, n: int32], [stride_2: int32, stride_3: int32], type="auto"),
                  X: Buffer(X_2: Pointer(float32), float32, [m, n], [stride_4: int32, stride_5: int32], type="auto")}
       buffer_map = {X_1: X, scan.v0_1: scan.v0, scan.v1_1: scan.v1} {
       for (i: int32, 0, n) {
-        scan.v0_2[(i*stride_1)] = (float32*)X_2[(i*stride_5)]
+        scan.v0_2[(i*stride_3)] = (float32*)X_2[(i*stride_5)]
       }
       for (i_1: int32, 0, l) {
-        scan.v1_2[(i_1*stride_3)] = 0f32
+        scan.v1_2[(i_1*stride_1)] = 0f32
       }
       for (scan.idx: int32, 0, (m - 1)) {
         for (i_2: int32, 0, n) {
-          scan.v0_2[(((scan.idx + 1)*stride) + (i_2*stride_1))] = ((float32*)scan.v0_2[((scan.idx*stride) + (i_2*stride_1))] + (float32*)X_2[(((scan.idx + 1)*stride_4) + (i_2*stride_5))])
+          scan.v0_2[(((scan.idx + 1)*stride_2) + (i_2*stride_3))] = ((float32*)scan.v0_2[((scan.idx*stride_2) + (i_2*stride_3))] + (float32*)X_2[(((scan.idx + 1)*stride_4) + (i_2*stride_5))])
         }
         for (i_3: int32, 0, l) {
-          scan.v1_2[(((scan.idx + 1)*stride_2) + (i_3*stride_3))] = ((float32*)scan.v1_2[((scan.idx*stride_2) + (i_3*stride_3))] + (float32*)scan.v0_2[(scan.idx*stride)])
+          scan.v1_2[(((scan.idx + 1)*stride) + (i_3*stride_1))] = ((float32*)scan.v1_2[((scan.idx*stride) + (i_3*stride_1))] + (float32*)scan.v0_2[(scan.idx*stride_2)])
         }
       }
     }
diff --git a/docs/_sources/tutorials/language/schedule_primitives.rst.txt b/docs/_sources/tutorials/language/schedule_primitives.rst.txt
index 7520112..b5102ed 100644
--- a/docs/_sources/tutorials/language/schedule_primitives.rst.txt
+++ b/docs/_sources/tutorials/language/schedule_primitives.rst.txt
@@ -492,12 +492,12 @@ tensor is required.
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {C: Buffer(C_2: Pointer(float32), float32, [m: int32], [stride: int32], type="auto"),
-                 B: Buffer(B_2: Pointer(float32), float32, [m], [stride_1: int32], type="auto"),
+      buffers = {B: Buffer(B_2: Pointer(float32), float32, [m: int32], [stride: int32], type="auto"),
+                 C: Buffer(C_2: Pointer(float32), float32, [m], [stride_1: int32], type="auto"),
                  A: Buffer(A_2: Pointer(float32), float32, [m], [stride_2: int32], type="auto")}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, m) {
-        C_2[(i*stride)] = (((float32*)A_2[(i*stride_2)] + 1f32)*2f32)
+        C_2[(i*stride_1)] = (((float32*)A_2[(i*stride_2)] + 1f32)*2f32)
       }
     }
 
diff --git a/docs/_sources/tutorials/language/sg_execution_times.rst.txt b/docs/_sources/tutorials/language/sg_execution_times.rst.txt
index cb4a145..d75bf38 100644
--- a/docs/_sources/tutorials/language/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/language/sg_execution_times.rst.txt
@@ -5,13 +5,13 @@
 
 Computation times
 =================
-**00:04.717** total execution time for **tutorials_language** files:
+**00:04.515** total execution time for **tutorials_language** files:
 
-- **00:01.823**: :ref:`sphx_glr_tutorials_language_intrin_math.py` (``intrin_math.py``)
-- **00:00.869**: :ref:`sphx_glr_tutorials_language_tensorize.py` (``tensorize.py``)
-- **00:00.627**: :ref:`sphx_glr_tutorials_language_scan.py` (``scan.py``)
-- **00:00.600**: :ref:`sphx_glr_tutorials_language_reduction.py` (``reduction.py``)
-- **00:00.249**: :ref:`sphx_glr_tutorials_language_extern_op.py` (``extern_op.py``)
-- **00:00.210**: :ref:`sphx_glr_tutorials_language_schedule_primitives.py` (``schedule_primitives.py``)
-- **00:00.185**: :ref:`sphx_glr_tutorials_language_tedd.py` (``tedd.py``)
-- **00:00.153**: :ref:`sphx_glr_tutorials_language_tuple_inputs.py` (``tuple_inputs.py``)
+- **00:01.736**: :ref:`sphx_glr_tutorials_language_intrin_math.py` (``intrin_math.py``)
+- **00:00.883**: :ref:`sphx_glr_tutorials_language_tensorize.py` (``tensorize.py``)
+- **00:00.602**: :ref:`sphx_glr_tutorials_language_reduction.py` (``reduction.py``)
+- **00:00.579**: :ref:`sphx_glr_tutorials_language_scan.py` (``scan.py``)
+- **00:00.234**: :ref:`sphx_glr_tutorials_language_extern_op.py` (``extern_op.py``)
+- **00:00.171**: :ref:`sphx_glr_tutorials_language_tedd.py` (``tedd.py``)
+- **00:00.165**: :ref:`sphx_glr_tutorials_language_schedule_primitives.py` (``schedule_primitives.py``)
+- **00:00.147**: :ref:`sphx_glr_tutorials_language_tuple_inputs.py` (``tuple_inputs.py``)
diff --git a/docs/_sources/tutorials/language/tensorize.rst.txt b/docs/_sources/tutorials/language/tensorize.rst.txt
index 5e78c99..8a03a3e 100644
--- a/docs/_sources/tutorials/language/tensorize.rst.txt
+++ b/docs/_sources/tutorials/language/tensorize.rst.txt
@@ -119,8 +119,8 @@ Thus we break down the matmul loops to make the innermost loops a (16x64) GEMV.
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {C: Buffer(C_2: Pointer(float32), float32, [1024, 512], []),
-                 B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
+      buffers = {B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
+                 C: Buffer(C_2: Pointer(float32), float32, [1024, 512], []),
                  A: Buffer(A_2: Pointer(float32), float32, [1024, 64], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
       for (i: int32, 0, 1024) {
@@ -308,12 +308,12 @@ The importing needs to happen before the tensorized GEMV being executed.
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
-                 C: Buffer(C_2: Pointer(float32), float32, [1024, 512], []),
+      buffers = {C: Buffer(C_2: Pointer(float32), float32, [1024, 512], []),
+                 B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
                  A: Buffer(A_2: Pointer(float32), float32, [1024, 64], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpgmvvkfpu/input0.cc'
-    source_filename = "/tmp/tmpgmvvkfpu/input0.cc"
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpxchcw7gw/input0.cc'
+    source_filename = "/tmp/tmpxchcw7gw/input0.cc"
     target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
     target triple = "x86_64-pc-linux-gnu"
 
diff --git a/docs/_sources/tutorials/language/tuple_inputs.rst.txt b/docs/_sources/tutorials/language/tuple_inputs.rst.txt
index 9289c63..2e75b9d 100644
--- a/docs/_sources/tutorials/language/tuple_inputs.rst.txt
+++ b/docs/_sources/tutorials/language/tuple_inputs.rst.txt
@@ -65,14 +65,14 @@ together in the next schedule procedure.
     primfn(A0_1: handle, A1_1: handle, B.v0_1: handle, B.v1_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
       buffers = {B.v1: Buffer(B.v1_2: Pointer(float32), float32, [m: int32, n: int32], [stride: int32, stride_1: int32], type="auto"),
-                 A1: Buffer(A1_2: Pointer(float32), float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
-                 B.v0: Buffer(B.v0_2: Pointer(float32), float32, [m, n], [stride_4: int32, stride_5: int32], type="auto"),
+                 B.v0: Buffer(B.v0_2: Pointer(float32), float32, [m, n], [stride_2: int32, stride_3: int32], type="auto"),
+                 A1: Buffer(A1_2: Pointer(float32), float32, [m, n], [stride_4: int32, stride_5: int32], type="auto"),
                  A0: Buffer(A0_2: Pointer(float32), float32, [m, n], [stride_6: int32, stride_7: int32], type="auto")}
       buffer_map = {A0_1: A0, A1_1: A1, B.v0_1: B.v0, B.v1_1: B.v1} {
       for (i: int32, 0, m) {
         for (j: int32, 0, n) {
-          B.v0_2[((i*stride_4) + (j*stride_5))] = ((float32*)A0_2[((i*stride_6) + (j*stride_7))] + 2f32)
-          B.v1_2[((i*stride) + (j*stride_1))] = ((float32*)A1_2[((i*stride_2) + (j*stride_3))]*3f32)
+          B.v0_2[((i*stride_2) + (j*stride_3))] = ((float32*)A0_2[((i*stride_6) + (j*stride_7))] + 2f32)
+          B.v1_2[((i*stride) + (j*stride_1))] = ((float32*)A1_2[((i*stride_4) + (j*stride_5))]*3f32)
         }
       }
     }
diff --git a/docs/_sources/tutorials/micro/sg_execution_times.rst.txt b/docs/_sources/tutorials/micro/sg_execution_times.rst.txt
index 1dcb55d..faaa95c 100644
--- a/docs/_sources/tutorials/micro/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/micro/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:10.279** total execution time for **tutorials_micro** files:
+**00:11.397** total execution time for **tutorials_micro** files:
 
-- **00:10.279**: :ref:`sphx_glr_tutorials_micro_micro_tflite.py` (``micro_tflite.py``)
+- **00:11.397**: :ref:`sphx_glr_tutorials_micro_micro_tflite.py` (``micro_tflite.py``)
diff --git a/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt b/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
index 97ded75..cde595f 100644
--- a/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_conv_cuda.rst.txt
@@ -296,7 +296,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 40.052619 ms
+    Convolution: 19.803000 ms
 
 
 
diff --git a/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt b/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
index 1e8b2b6..9406154 100644
--- a/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_conv_tensorcore.rst.txt
@@ -624,7 +624,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 13.376364 ms
+    conv2d with tensor core: 6.651737 ms
 
 
 
diff --git a/docs/_sources/tutorials/optimize/opt_gemm.rst.txt b/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
index 50bf4cc..abb312f 100644
--- a/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
+++ b/docs/_sources/tutorials/optimize/opt_gemm.rst.txt
@@ -118,8 +118,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.009183
-    Baseline: 3.414023
+    Numpy running time: 0.006994
+    Baseline: 6.085453
 
 
 
@@ -206,7 +206,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.283587
+    Opt1: 0.135239
 
 
 
@@ -300,7 +300,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.318179
+    Opt2: 0.210036
 
 
 
@@ -389,7 +389,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.111974
+    Opt3: 0.106779
 
 
 
@@ -499,7 +499,7 @@ the corresponding value from the packed array.
 
  .. code-block:: none
 
-    Opt4: 0.162749
+    Opt4: 0.103652
 
 
 
@@ -609,7 +609,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.106645
+    Opt5: 0.112343
 
 
 
@@ -725,7 +725,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.034892
+    Opt6: 0.033847
 
 
 
@@ -749,8 +749,8 @@ Here is the generated IR after parallelization.
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {C: Buffer(C_2: Pointer(float32), float32, [1024, 1024], []),
-                 B: Buffer(B_2: Pointer(float32), float32, [1024, 1024], []),
+      buffers = {B: Buffer(B_2: Pointer(float32), float32, [1024, 1024], []),
+                 C: Buffer(C_2: Pointer(float32), float32, [1024, 1024], []),
                  A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [packedB: Pointer(float32)] "storage_scope" = "global";
diff --git a/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
index fc6677f..658c912 100644
--- a/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,9 +5,9 @@
 
 Computation times
 =================
-**00:28.259** total execution time for **tutorials_optimize** files:
+**00:32.028** total execution time for **tutorials_optimize** files:
 
-- **00:25.834**: :ref:`sphx_glr_tutorials_optimize_opt_gemm.py` (``opt_gemm.py``)
-- **00:01.231**: :ref:`sphx_glr_tutorials_optimize_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
-- **00:01.004**: :ref:`sphx_glr_tutorials_optimize_opt_conv_cuda.py` (``opt_conv_cuda.py``)
-- **00:00.190**: :ref:`sphx_glr_tutorials_optimize_opt_matmul_auto_tensorcore.py` (``opt_matmul_auto_tensorcore.py``)
+- **00:29.541**: :ref:`sphx_glr_tutorials_optimize_opt_gemm.py` (``opt_gemm.py``)
+- **00:01.408**: :ref:`sphx_glr_tutorials_optimize_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``)
+- **00:00.886**: :ref:`sphx_glr_tutorials_optimize_opt_conv_cuda.py` (``opt_conv_cuda.py``)
+- **00:00.192**: :ref:`sphx_glr_tutorials_optimize_opt_matmul_auto_tensorcore.py` (``opt_matmul_auto_tensorcore.py``)
diff --git a/docs/_sources/tutorials/topi/intro_topi.rst.txt b/docs/_sources/tutorials/topi/intro_topi.rst.txt
index 7c437c1..72062c6 100644
--- a/docs/_sources/tutorials/topi/intro_topi.rst.txt
+++ b/docs/_sources/tutorials/topi/intro_topi.rst.txt
@@ -231,7 +231,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, 0x1c395a180), stage(b, 0xc0aadd40), stage(T_add, 0x879c6340), stage(T_multiply, 0xcc4a6aa0), stage(T_elemwise_sum, 0xb71ac290), stage(T_divide, 0xccda2200), stage(T_divide_red.rf, 0xcf98ac90), stage(T_divide_red, 0x13964e540)]
+    [stage(a, 0xaf4eb8e0), stage(b, 0xbdf81e40), stage(T_add, 0x9fdc1860), stage(T_multiply, 0xab292130), stage(T_elemwise_sum, 0xba874b10), stage(T_divide, 0x97b0cd80), stage(T_divide_red.rf, 0x118673d60), stage(T_divide_red, 0x123c6ad80)]
 
 
 
diff --git a/docs/_sources/tutorials/topi/sg_execution_times.rst.txt b/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
index 8b59ca8..1b263f3 100644
--- a/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorials/topi/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:00.698** total execution time for **tutorials_topi** files:
+**00:00.688** total execution time for **tutorials_topi** files:
 
-- **00:00.698**: :ref:`sphx_glr_tutorials_topi_intro_topi.py` (``intro_topi.py``)
+- **00:00.688**: :ref:`sphx_glr_tutorials_topi_intro_topi.py` (``intro_topi.py``)
diff --git a/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
index eec2662..cf1d017 100644
--- a/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:07.664** total execution time for **vta_tutorials_autotvm** files:
+**00:06.543** total execution time for **vta_tutorials_autotvm** files:
 
-- **00:07.664**: :ref:`sphx_glr_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
+- **00:06.543**: :ref:`sphx_glr_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``)
diff --git a/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt b/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
index c7c9ddd..2217a64 100644
--- a/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
+++ b/docs/_sources/vta/tutorials/autotvm/tune_relay_vta.rst.txt
@@ -497,7 +497,7 @@ Finally, we launch tuning jobs and evaluate the end-to-end performance.
  .. code-block:: none
 
     Extract tasks...
-
    ...1%, 0.01 MB, 26 KB/s, 0 seconds passed
    ...2%, 0.02 MB, 51 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 77 KB/s, 0 seconds passed
    ...4%, 0.03 MB, 102 KB/s, 0 seconds passed
    ...5%, 0.04 MB, 127 KB/s, 0 seconds passed
    ...6%, 0.05 MB, 149 KB/s, 0 seconds passed
    ...7%, 0.05 MB, 174 KB/s, 0 seconds passed
    ...8%, 0.06 MB, 198 KB/s, 0 seconds passed
    ...9%, 0.07 MB, 222 KB/s, 0 seconds passed
    ...10%, 0.08 MB, 247 KB/s, 0 seconds passed
    ...11%, 0.09 MB, 271 KB/s, 0 seconds passed
    ...13%, 0.09 MB, 291 KB/s, 0 seconds passed
    ...14%, 0.10 MB, 316 KB/s, 0 seconds passed
    ...15%, 0.11 MB, 339 KB/s, 0 seconds passed
    ...16%, 0.12 MB, 363 KB/s, 0 seconds passed
    ...17%, 0.12 MB, 386 KB/s, 0 seconds passed
    ...18%, 0.13 MB, 410 KB/s, 0 seconds passed
    ...19%, 0.14 MB, 433 KB/s, 0 seconds passed
    ...20%, 0.15 MB, 457 KB/s, 0 seconds passed
    ...21%, 0.16 MB, 480 KB/s, 0 seconds passed
    ...22%, 0.16 MB, 504 KB/s, 0 seconds passed
     ...23%, 0.17 MB, 528 KB/s, 0 seconds passed
    ...25%, 0.18 MB, 551 KB/s, 0 seconds passed
    ...26%, 0.19 MB, 574 KB/s, 0 seconds passed
    ...27%, 0.20 MB, 597 KB/s, 0 seconds passed
    ...28%, 0.20 MB, 615 KB/s, 0 seconds passed
    ...29%, 0.21 MB, 637 KB/s, 0 seconds passed
    ...30%, 0.22 MB, 661 KB/s, 0 seconds passed
    ...31%, 0.23 MB, 684 KB/s, 0 seconds passed
    ...32%, 0.23 MB, 708 KB/s, 0 seconds passed
    ...33%, 0.24 MB, 729 KB/s, 0 seconds passed
    ...34%, 0.25 MB, 753 KB/s, 0 seconds passed
    ...35%, 0.26 MB, 776 KB/s, 0 seconds passed
    ...36%, 0.27 MB, 799 KB/s, 0 seconds passed
    ...38%, 0.27 MB, 821 KB/s, 0 seconds passed
    ...39%, 0.28 MB, 844 KB/s, 0 seconds passed
    ...40%, 0.29 MB, 868 KB/s, 0 seconds passed
    ...41%, 0.30 MB, 891 KB/s, 0 seconds passed
    ...42%, 0.30 MB, 910 KB/s, 0 seconds passed
    ...43%, 0.31 MB, 933 KB/s, 0 seconds passed
    ...44%, 0.32 MB, 954 KB/s, 0 seconds passed
    ...45%, 0.33 MB, 977 KB/s, 0 seco
 nds passed
    ...46%, 0.34 MB, 1000 KB/s, 0 seconds passed
    ...47%, 0.34 MB, 1023 KB/s, 0 seconds passed
    ...48%, 0.35 MB, 1044 KB/s, 0 seconds passed
    ...50%, 0.36 MB, 1067 KB/s, 0 seconds passed
    ...51%, 0.37 MB, 1087 KB/s, 0 seconds passed
    ...52%, 0.38 MB, 1110 KB/s, 0 seconds passed
    ...53%, 0.38 MB, 1133 KB/s, 0 seconds passed
    ...54%, 0.39 MB, 1156 KB/s, 0 seconds passed
    ...55%, 0.40 MB, 1174 KB/s, 0 seconds passed
    ...56%, 0.41 MB, 1197 KB/s, 0 seconds passed
    ...57%, 0.41 MB, 1218 KB/s, 0 seconds passed
    ...58%, 0.42 MB, 1241 KB/s, 0 seconds passed
    ...59%, 0.43 MB, 1261 KB/s, 0 seconds passed
    ...60%, 0.44 MB, 1284 KB/s, 0 seconds passed
    ...62%, 0.45 MB, 1306 KB/s, 0 seconds passed
    ...63%, 0.45 MB, 1329 KB/s, 0 seconds passed
    ...64%, 0.46 MB, 1349 KB/s, 0 seconds passed
    ...65%, 0.47 MB, 1371 KB/s, 0 seconds passed
    ...66%, 0.48 MB, 1394 KB/s, 0 seconds passed
    ...67%, 0.48 MB, 1417 KB/s, 0 seconds passed
    ..
 .68%, 0.49 MB, 1432 KB/s, 0 seconds passed
    ...69%, 0.50 MB, 1454 KB/s, 0 seconds passed
    ...70%, 0.51 MB, 1477 KB/s, 0 seconds passed
    ...71%, 0.52 MB, 1499 KB/s, 0 seconds passed
    ...72%, 0.52 MB, 1519 KB/s, 0 seconds passed
    ...73%, 0.53 MB, 1541 KB/s, 0 seconds passed
    ...75%, 0.54 MB, 1564 KB/s, 0 seconds passed
    ...76%, 0.55 MB, 1586 KB/s, 0 seconds passed
    ...77%, 0.55 MB, 1601 KB/s, 0 seconds passed
    ...78%, 0.56 MB, 1623 KB/s, 0 seconds passed
    ...79%, 0.57 MB, 1645 KB/s, 0 seconds passed
    ...80%, 0.58 MB, 1667 KB/s, 0 seconds passed
    ...81%, 0.59 MB, 1684 KB/s, 0 seconds passed
    ...82%, 0.59 MB, 1706 KB/s, 0 seconds passed
    ...83%, 0.60 MB, 1726 KB/s, 0 seconds passed
    ...84%, 0.61 MB, 1748 KB/s, 0 seconds passed
    ...85%, 0.62 MB, 1767 KB/s, 0 seconds passed
    ...87%, 0.62 MB, 1789 KB/s, 0 seconds passed
    ...88%, 0.63 MB, 1811 KB/s, 0 seconds passed
    ...89%, 0.64 MB, 1833 KB/s, 0 seconds passed
    ...90%, 0.65 MB, 18
 51 KB/s, 0 seconds passed
    ...91%, 0.66 MB, 1873 KB/s, 0 seconds passed
    ...92%, 0.66 MB, 1894 KB/s, 0 seconds passed
    ...93%, 0.67 MB, 1916 KB/s, 0 seconds passed
    ...94%, 0.68 MB, 1934 KB/s, 0 seconds passed
    ...95%, 0.69 MB, 1956 KB/s, 0 seconds passed
    ...96%, 0.70 MB, 1976 KB/s, 0 seconds passed
    ...97%, 0.70 MB, 1998 KB/s, 0 seconds passed
    ...99%, 0.71 MB, 2017 KB/s, 0 seconds passed
    ...100%, 0.72 MB, 2038 KB/s, 0 seconds passed
+
    ...1%, 0.01 MB, 579 KB/s, 0 seconds passed
    ...2%, 0.02 MB, 1105 KB/s, 0 seconds passed
    ...3%, 0.02 MB, 1637 KB/s, 0 seconds passed
    ...4%, 0.03 MB, 2156 KB/s, 0 seconds passed
    ...5%, 0.04 MB, 2567 KB/s, 0 seconds passed
    ...6%, 0.05 MB, 3002 KB/s, 0 seconds passed
    ...7%, 0.05 MB, 3463 KB/s, 0 seconds passed
    ...8%, 0.06 MB, 3869 KB/s, 0 seconds passed
    ...9%, 0.07 MB, 4203 KB/s, 0 seconds passed
    ...10%, 0.08 MB, 4617 KB/s, 0 seconds passed
    ...11%, 0.09 MB, 4972 KB/s, 0 seconds passed
    ...13%, 0.09 MB, 5254 KB/s, 0 seconds passed
    ...14%, 0.10 MB, 5631 KB/s, 0 seconds passed
    ...15%, 0.11 MB, 6009 KB/s, 0 seconds passed
    ...16%, 0.12 MB, 6361 KB/s, 0 seconds passed
    ...17%, 0.12 MB, 6576 KB/s, 0 seconds passed
    ...18%, 0.13 MB, 6927 KB/s, 0 seconds passed
    ...19%, 0.14 MB, 7156 KB/s, 0 seconds passed
    ...20%, 0.15 MB, 7492 KB/s, 0 seconds passed
    ...21%, 0.16 MB, 7799 KB/s, 0 seconds passed
    ...22%, 0.16 MB, 8115 
 KB/s, 0 seconds passed
    ...23%, 0.17 MB, 8268 KB/s, 0 seconds passed
    ...25%, 0.18 MB, 8499 KB/s, 0 seconds passed
    ...26%, 0.19 MB, 8791 KB/s, 0 seconds passed
    ...27%, 0.20 MB, 9088 KB/s, 0 seconds passed
    ...28%, 0.20 MB, 9341 KB/s, 0 seconds passed
    ...29%, 0.21 MB, 9608 KB/s, 0 seconds passed
    ...30%, 0.22 MB, 9804 KB/s, 0 seconds passed
    ...31%, 0.23 MB, 10081 KB/s, 0 seconds passed
    ...32%, 0.23 MB, 10255 KB/s, 0 seconds passed
    ...33%, 0.24 MB, 10521 KB/s, 0 seconds passed
    ...34%, 0.25 MB, 10684 KB/s, 0 seconds passed
    ...35%, 0.26 MB, 10942 KB/s, 0 seconds passed
    ...36%, 0.27 MB, 11179 KB/s, 0 seconds passed
    ...38%, 0.27 MB, 11327 KB/s, 0 seconds passed
    ...39%, 0.28 MB, 11574 KB/s, 0 seconds passed
    ...40%, 0.29 MB, 11817 KB/s, 0 seconds passed
    ...41%, 0.30 MB, 12057 KB/s, 0 seconds passed
    ...42%, 0.30 MB, 12149 KB/s, 0 seconds passed
    ...43%, 0.31 MB, 12380 KB/s, 0 seconds passed
    ...44%, 0.32 MB, 12613 KB/s
 , 0 seconds passed
    ...45%, 0.33 MB, 12840 KB/s, 0 seconds passed
    ...46%, 0.34 MB, 13064 KB/s, 0 seconds passed
    ...47%, 0.34 MB, 13273 KB/s, 0 seconds passed
    ...48%, 0.35 MB, 13391 KB/s, 0 seconds passed
    ...50%, 0.36 MB, 13603 KB/s, 0 seconds passed
    ...51%, 0.37 MB, 13814 KB/s, 0 seconds passed
    ...52%, 0.38 MB, 14023 KB/s, 0 seconds passed
    ...53%, 0.38 MB, 14238 KB/s, 0 seconds passed
    ...54%, 0.39 MB, 14321 KB/s, 0 seconds passed
    ...55%, 0.40 MB, 14536 KB/s, 0 seconds passed
    ...56%, 0.41 MB, 14709 KB/s, 0 seconds passed
    ...57%, 0.41 MB, 14874 KB/s, 0 seconds passed
    ...58%, 0.42 MB, 15083 KB/s, 0 seconds passed
    ...59%, 0.43 MB, 15242 KB/s, 0 seconds passed
    ...60%, 0.44 MB, 15413 KB/s, 0 seconds passed
    ...62%, 0.45 MB, 15618 KB/s, 0 seconds passed
    ...63%, 0.45 MB, 15763 KB/s, 0 seconds passed
    ...64%, 0.46 MB, 15934 KB/s, 0 seconds passed
    ...65%, 0.47 MB, 16133 KB/s, 0 seconds passed
    ...66%, 0.48 MB, 16249 K
 B/s, 0 seconds passed
    ...67%, 0.48 MB, 16415 KB/s, 0 seconds passed
    ...68%, 0.49 MB, 16608 KB/s, 0 seconds passed
    ...69%, 0.50 MB, 16611 KB/s, 0 seconds passed
    ...70%, 0.51 MB, 16798 KB/s, 0 seconds passed
    ...71%, 0.52 MB, 16986 KB/s, 0 seconds passed
    ...72%, 0.52 MB, 17172 KB/s, 0 seconds passed
    ...73%, 0.53 MB, 17133 KB/s, 0 seconds passed
    ...75%, 0.54 MB, 17265 KB/s, 0 seconds passed
    ...76%, 0.55 MB, 17445 KB/s, 0 seconds passed
    ...77%, 0.55 MB, 17623 KB/s, 0 seconds passed
    ...78%, 0.56 MB, 17799 KB/s, 0 seconds passed
    ...79%, 0.57 MB, 17976 KB/s, 0 seconds passed
    ...80%, 0.58 MB, 17936 KB/s, 0 seconds passed
    ...81%, 0.59 MB, 18107 KB/s, 0 seconds passed
    ...82%, 0.59 MB, 18276 KB/s, 0 seconds passed
    ...83%, 0.60 MB, 18446 KB/s, 0 seconds passed
    ...84%, 0.61 MB, 18613 KB/s, 0 seconds passed
    ...85%, 0.62 MB, 18780 KB/s, 0 seconds passed
    ...87%, 0.62 MB, 18945 KB/s, 0 seconds passed
    ...88%, 0.63 MB, 1893
 2 KB/s, 0 seconds passed
    ...89%, 0.64 MB, 19092 KB/s, 0 seconds passed
    ...90%, 0.65 MB, 19225 KB/s, 0 seconds passed
    ...91%, 0.66 MB, 19380 KB/s, 0 seconds passed
    ...92%, 0.66 MB, 19536 KB/s, 0 seconds passed
    ...93%, 0.67 MB, 19693 KB/s, 0 seconds passed
    ...94%, 0.68 MB, 19850 KB/s, 0 seconds passed
    ...95%, 0.69 MB, 20003 KB/s, 0 seconds passed
    ...96%, 0.70 MB, 20157 KB/s, 0 seconds passed
    ...97%, 0.70 MB, 20309 KB/s, 0 seconds passed
    ...99%, 0.71 MB, 20447 KB/s, 0 seconds passed
    ...100%, 0.72 MB, 20491 KB/s, 0 seconds passed
     Extracted 10 conv2d tasks:
     (1, 14, 14, 256, 512, 1, 1, 0, 0, 2, 2)
     (1, 28, 28, 128, 256, 1, 1, 0, 0, 2, 2)
diff --git a/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
index 575dd4c..20284d0 100644
--- a/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -243,8 +243,8 @@ The compilation steps are:
 
  .. code-block:: none
 
-
    ...12%, 0.01 MB, 40 KB/s, 0 seconds passed
    ...25%, 0.02 MB, 78 KB/s, 0 seconds passed
    ...38%, 0.02 MB, 117 KB/s, 0 seconds passed
    ...51%, 0.03 MB, 156 KB/s, 0 seconds passed
    ...64%, 0.04 MB, 193 KB/s, 0 seconds passed
    ...77%, 0.05 MB, 226 KB/s, 0 seconds passed
    ...89%, 0.05 MB, 263 KB/s, 0 seconds passed
    ...100%, 0.06 MB, 300 KB/s, 0 seconds passed
-    resnet18_v1 inference graph built in 9.11s!
+
    ...12%, 0.01 MB, 446 KB/s, 0 seconds passed
    ...25%, 0.02 MB, 861 KB/s, 0 seconds passed
    ...38%, 0.02 MB, 1266 KB/s, 0 seconds passed
    ...51%, 0.03 MB, 1677 KB/s, 0 seconds passed
    ...64%, 0.04 MB, 2049 KB/s, 0 seconds passed
    ...77%, 0.05 MB, 2387 KB/s, 0 seconds passed
    ...89%, 0.05 MB, 2758 KB/s, 0 seconds passed
    ...100%, 0.06 MB, 3086 KB/s, 0 seconds passed
+    resnet18_v1 inference graph built in 8.23s!
 
 
 
diff --git a/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
index ea9c104..3e6b3b3 100644
--- a/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,6 +5,6 @@
 
 Computation times
 =================
-**00:30.528** total execution time for **vta_tutorials_frontend** files:
+**00:30.503** total execution time for **vta_tutorials_frontend** files:
 
-- **00:30.528**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
+- **00:30.503**: :ref:`sphx_glr_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``)
diff --git a/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt b/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
index 88370f8..c6af533 100644
--- a/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/convolution_opt.rst.txt
@@ -448,8 +448,8 @@ below.
 
     primfn(data_1: handle, kernel_1: handle, res_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {res: Buffer(res_2: Pointer(int8), int8, [1, 16, 14, 14, 1, 16], []),
-                 kernel: Buffer(kernel_2: Pointer(int8), int8, [16, 16, 3, 3, 16, 16], []),
+      buffers = {kernel: Buffer(kernel_2: Pointer(int8), int8, [16, 16, 3, 3, 16, 16], []),
+                 res: Buffer(res_2: Pointer(int8), int8, [1, 16, 14, 14, 1, 16], []),
                  data: Buffer(data_2: Pointer(int8), int8, [1, 16, 14, 14, 1, 16], [])}
       buffer_map = {data_1: data, kernel_1: kernel, res_1: res} {
       attr [data_buf: Pointer(int8)] "storage_scope" = "global";
diff --git a/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
index 998cfa6..b2706e1 100644
--- a/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:03.880** total execution time for **vta_tutorials_optimize** files:
+**00:03.587** total execution time for **vta_tutorials_optimize** files:
 
-- **00:03.302**: :ref:`sphx_glr_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
-- **00:00.578**: :ref:`sphx_glr_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
+- **00:02.976**: :ref:`sphx_glr_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)
+- **00:00.611**: :ref:`sphx_glr_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``)
diff --git a/docs/_sources/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
index 00c5127..9128215 100644
--- a/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/vta/tutorials/sg_execution_times.rst.txt
@@ -5,7 +5,7 @@
 
 Computation times
 =================
-**00:01.063** total execution time for **vta_tutorials** files:
+**00:01.062** total execution time for **vta_tutorials** files:
 
-- **00:00.538**: :ref:`sphx_glr_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
-- **00:00.525**: :ref:`sphx_glr_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
+- **00:00.535**: :ref:`sphx_glr_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``)
+- **00:00.527**: :ref:`sphx_glr_vta_tutorials_vta_get_started.py` (``vta_get_started.py``)
diff --git a/docs/_sources/vta/tutorials/vta_get_started.rst.txt b/docs/_sources/vta/tutorials/vta_get_started.rst.txt
index 8f6368b..1f03202 100644
--- a/docs/_sources/vta/tutorials/vta_get_started.rst.txt
+++ b/docs/_sources/vta/tutorials/vta_get_started.rst.txt
@@ -300,8 +300,8 @@ After we construct the schedule, by default the schedule computes
 
     primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"global_symbol": "main", "tir.noalias": True}
-      buffers = {C: Buffer(C_2: Pointer(int8), int8, [1, 64, 1, 16], []),
-                 B: Buffer(B_2: Pointer(int32), int32, [1, 64, 1, 16], []),
+      buffers = {B: Buffer(B_2: Pointer(int32), int32, [1, 64, 1, 16], []),
+                 C: Buffer(C_2: Pointer(int8), int8, [1, 64, 1, 16], []),
                  A: Buffer(A_2: Pointer(int32), int32, [1, 64, 1, 16], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
       attr [A_buf: Pointer(int32)] "storage_scope" = "global";
diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js
index 634215f..9bdd13b 100644
--- a/docs/_static/documentation_options.js
+++ b/docs/_static/documentation_options.js
@@ -1,6 +1,6 @@
 var DOCUMENTATION_OPTIONS = {
     URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
-    VERSION: '0.7.dev1',
+    VERSION: '0.7.0',
     LANGUAGE: 'None',
     COLLAPSE_INDEX: false,
     BUILDER: 'html',
diff --git a/docs/api/doxygen/attr__registry__map_8h_source.html b/docs/api/doxygen/attr__registry__map_8h_source.html
index de4112f..d4ccdcc 100644
--- a/docs/api/doxygen/attr__registry__map_8h_source.html
+++ b/docs/api/doxygen/attr__registry__map_8h_source.html
@@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">attr_registry_map.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="attr__registry__map_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * [...]
+<a href="attr__registry__map_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * [...]
 <div class="ttc" id="classtvm_1_1AttrRegistryMap_html_aefe7933b03f49eac54bec961e75f8650"><div class="ttname"><a href="classtvm_1_1AttrRegistryMap.html#aefe7933b03f49eac54bec961e75f8650">tvm::AttrRegistryMap::AttrRegistryMap</a></div><div class="ttdeci">AttrRegistryMap(const AttrRegistryMapContainerMap&lt; KeyType &gt; &amp;map)</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> attr_registry_map.h:107</div></div>
 <div class="ttc" id="classtvm_1_1AttrRegistryMapContainerMap_html_a43ad528c7d4ff44543447ef8f5b5bb53"><div class="ttname"><a href="classtvm_1_1AttrRegistryMapContainerMap.html#a43ad528c7d4ff44543447ef8f5b5bb53">tvm::AttrRegistryMapContainerMap::count</a></div><div class="ttdeci">int count(const KeyType &amp;key) const </div><div class="ttdoc">Check if the map has key. </div><div class="ttdef"><b>Definition:</b> attr_registry_map.h:45</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
diff --git a/docs/api/doxygen/c__runtime__api_8h.html b/docs/api/doxygen/c__runtime__api_8h.html
index acfc565..64e2aa5 100644
--- a/docs/api/doxygen/c__runtime__api_8h.html
+++ b/docs/api/doxygen/c__runtime__api_8h.html
@@ -126,7 +126,7 @@ Classes</h2></td></tr>
 Macros</h2></td></tr>
 <tr class="memitem:a4d6bcd569c115f7ae4ddc455b14ca395"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="c__runtime__api_8h.html#a4d6bcd569c115f7ae4ddc455b14ca395">TVM_WEAK</a>&#160;&#160;&#160;__attribute__((weak))</td></tr>
 <tr class="separator:a4d6bcd569c115f7ae4ddc455b14ca395"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a429515e9851601ec44c8ed421521ab6d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="c__runtime__api_8h.html#a429515e9851601ec44c8ed421521ab6d">TVM_VERSION</a>&#160;&#160;&#160;&quot;0.7.dev1&quot;</td></tr>
+<tr class="memitem:a429515e9851601ec44c8ed421521ab6d"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="c__runtime__api_8h.html#a429515e9851601ec44c8ed421521ab6d">TVM_VERSION</a>&#160;&#160;&#160;&quot;0.7.0&quot;</td></tr>
 <tr class="separator:a429515e9851601ec44c8ed421521ab6d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="typedef-members"></a>
@@ -322,7 +322,7 @@ Functions</h2></td></tr>
 <div class="memproto">
       <table class="memname">
         <tr>
-          <td class="memname">#define TVM_VERSION&#160;&#160;&#160;&quot;0.7.dev1&quot;</td>
+          <td class="memname">#define TVM_VERSION&#160;&#160;&#160;&quot;0.7.0&quot;</td>
         </tr>
       </table>
 </div><div class="memdoc">
diff --git a/docs/api/doxygen/c__runtime__api_8h_source.html b/docs/api/doxygen/c__runtime__api_8h_source.html
index 8edb719..0f9a979 100644
--- a/docs/api/doxygen/c__runtime__api_8h_source.html
+++ b/docs/api/doxygen/c__runtime__api_8h_source.html
@@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">c_runtime_api.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="c__runtime__api_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or  [...]
+<a href="c__runtime__api_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or  [...]
 <div class="ttc" id="unionTVMValue_html_aa1c40fa9e74fbf97541fd9735062c4cc"><div class="ttname"><a href="unionTVMValue.html#aa1c40fa9e74fbf97541fd9735062c4cc">TVMValue::v_int64</a></div><div class="ttdeci">int64_t v_int64</div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:145</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a07954203342f2787acf988c4c351d9c3"><div class="ttname"><a href="c__runtime__api_8h.html#a07954203342f2787acf988c4c351d9c3">TVMPackedCFunc</a></div><div class="ttdeci">int(* TVMPackedCFunc)(TVMValue *args, int *type_codes, int num_args, TVMRetValueHandle ret, void *resource_handle)</div><div class="ttdoc">C type of packed function. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:305</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a477111f9accd70633dc5f2e7139b6cf4"><div class="ttname"><a href="c__runtime__api_8h.html#a477111f9accd70633dc5f2e7139b6cf4">TVMAPISetLastError</a></div><div class="ttdeci">void TVMAPISetLastError(const char *msg)</div><div class="ttdoc">Used for implementing C API function. Set last error message before return. </div></div>
diff --git a/docs/api/doxygen/classtvm_1_1runtime_1_1NDArray.html b/docs/api/doxygen/classtvm_1_1runtime_1_1NDArray.html
index be05653..3e9a3bd 100644
--- a/docs/api/doxygen/classtvm_1_1runtime_1_1NDArray.html
+++ b/docs/api/doxygen/classtvm_1_1runtime_1_1NDArray.html
@@ -452,7 +452,7 @@ Additional Inherited Members</h2></td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section note"><dt>Note</dt><dd>The copy may happen asynchronously if it involves a GPU context. TVMSynchronize is necessary. </dd></dl>
+<dl class="section note"><dt>Note</dt><dd>The copy always triggers a TVMSynchronize. </dd></dl>
 
 </div>
 </div>
@@ -628,7 +628,7 @@ Additional Inherited Members</h2></td></tr>
   </table>
   </dd>
 </dl>
-<dl class="section note"><dt>Note</dt><dd>The copy may happen asynchronously if it involves a GPU context. TVMSynchronize is necessary. </dd></dl>
+<dl class="section note"><dt>Note</dt><dd>The copy always triggers a TVMSynchronize. </dd></dl>
 
 </div>
 </div>
diff --git a/docs/api/doxygen/codegen_8h_source.html b/docs/api/doxygen/codegen_8h_source.html
index e842014..d49afd8 100644
--- a/docs/api/doxygen/codegen_8h_source.html
+++ b/docs/api/doxygen/codegen_8h_source.html
@@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">codegen.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="codegen_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
+<a href="codegen_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
 <div class="ttc" id="ir_2module_8h_html"><div class="ttname"><a href="ir_2module_8h.html">module.h</a></div><div class="ttdoc">IRModule that holds the functions and type definitions. </div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="crt_2packed__func_8h_html_ad869d7c5618f982f6841399c216a234c"><div class="ttname"><a href="crt_2packed__func_8h.html#ad869d7c5618f982f6841399c216a234c">TVMArgs</a></div><div class="ttdeci">struct TVMArgs TVMArgs</div></div>
diff --git a/docs/api/doxygen/device__api_8h_source.html b/docs/api/doxygen/device__api_8h_source.html
index 87c0d5c..f5c7530 100644
--- a/docs/api/doxygen/device__api_8h_source.html
+++ b/docs/api/doxygen/device__api_8h_source.html
@@ -90,7 +90,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 <a href="device__api_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="namespacetvm_1_1runtime_html_a46fef1ca0ccc05473e9bb0a8c6b66619a69fe0643750b0c49e8b8aefb1cada337"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a46fef1ca0ccc05473e9bb0a8c6b66619a69fe0643750b0c49e8b8aefb1cada337">tvm::runtime::kApiVersion</a></div><div class="ttdef"><b>Definition:</b> device_api.h:49</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a57cbccb14c35a0e62dbc1b911188fcefacdc33f5efa9ddabe89e886c28d1ff65b"><div class="ttname"><a href="c__runtime__api_8h.html#a57cbccb14c35a0e62dbc1b911188fcefacdc33f5efa9ddabe89e886c28d1ff65b">kDLSDAccel</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:81</div></div>
 <div class="ttc" id="namespacetvm_1_1runtime_html_a2f6f769f6dbbbb24929b7c9f91a48c90"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a2f6f769f6dbbbb24929b7c9f91a48c90">tvm::runtime::kMaxStackAlloca</a></div><div class="ttdeci">constexpr int kMaxStackAlloca</div><div class="ttdoc">Maximum size that can be allocated on stack. </div><div class="ttdef"><b>Definition:</b> device_api.h:59</div></div>
diff --git a/docs/api/doxygen/env__func_8h_source.html b/docs/api/doxygen/env__func_8h_source.html
index 817c7d3..9fac714 100644
--- a/docs/api/doxygen/env__func_8h_source.html
+++ b/docs/api/doxygen/env__func_8h_source.html
@@ -90,7 +90,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 <a href="env__func_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4_html_a2c756d14682b41d9b733673443b782f1"><div class="ttname"><a href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#a2c756d14682b41d9b733673443b782f1">tvm::TypedEnvFunc&lt; R(Args...)&gt;::operator()</a></div><div class="ttdeci">R operator()(Args...args) const </div><div class="ttdoc">Invoke the function. </div><div class="ttdef"><b>Definition:</b> env_func.h:138</div></div>
 <div class="ttc" id="classtvm_1_1SEqualReducer_html"><div class="ttname"><a href="classtvm_1_1SEqualReducer.html">tvm::SEqualReducer</a></div><div class="ttdoc">A Reducer class to reduce the structural equality result of two objects. </div><div class="ttdef"><b>Definition:</b> structural_equal.h:102</div></div>
 <div class="ttc" id="classtvm_1_1TypedEnvFunc_html"><div class="ttname"><a href="classtvm_1_1TypedEnvFunc.html">tvm::TypedEnvFunc</a></div><div class="ttdoc">Please refer to TypedEnvFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> env_func.h:104</div></div>
diff --git a/docs/api/doxygen/generic__func_8h_source.html b/docs/api/doxygen/generic__func_8h_source.html
index 31cce02..8cbcc86 100644
--- a/docs/api/doxygen/generic__func_8h_source.html
+++ b/docs/api/doxygen/generic__func_8h_source.html
@@ -90,7 +90,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 <a href="generic__func_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mo [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="classtvm_1_1GenericFunc_html"><div class="ttname"><a href="classtvm_1_1GenericFunc.html">tvm::GenericFunc</a></div><div class="ttdoc">Generic function that can be specialized on a per-target basis. </div><div class="ttdef"><b>Definition:</b> generic_func.h:43</div></div>
 <div class="ttc" id="classtvm_1_1GenericFunc_html_a97c34a40c5059bdda64494d61f50602d"><div class="ttname"><a href="classtvm_1_1GenericFunc.html#a97c34a40c5059bdda64494d61f50602d">tvm::GenericFunc::set_default</a></div><div class="ttdeci">GenericFunc &amp; set_default(const runtime::PackedFunc value, bool allow_override=false)</div><div class="ttdoc">Set the default function implementaiton. </div></div>
 <div class="ttc" id="classtvm_1_1GenericFunc_html_a909acecbf2f34f847a34e587a4570dce"><div class="ttname"><a href="classtvm_1_1GenericFunc.html#a909acecbf2f34f847a34e587a4570dce">tvm::GenericFunc::RegisterGenericFunc</a></div><div class="ttdeci">static void RegisterGenericFunc(GenericFunc func, const std::string &amp;name)</div><div class="ttdoc">Add a GenericFunc instance to the registry. </div></div>
@@ -112,7 +112,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1GenericFunc_html_a4a04307dffa174f71cdfb08d1903dec2"><div class="ttname"><a href="classtvm_1_1GenericFunc.html#a4a04307dffa174f71cdfb08d1903dec2">tvm::GenericFunc::operator-&gt;</a></div><div class="ttdeci">GenericFuncNode * operator-&gt;()</div><div class="ttdoc">access the internal node container </div><div class="ttdef"><b>Definition:</b> generic_func.h:151</div></div>
 <div class="ttc" id="target_8h_html"><div class="ttname"><a href="target_8h.html">target.h</a></div><div class="ttdoc">Compilation target object. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html">tvm::runtime::PackedFunc</a></div><div class="ttdoc">Packed function is a type-erased function. The arguments are passed by packed format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:75</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1080</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1082</div></div>
 <div class="ttc" id="classtvm_1_1GenericFunc_html_a74a6f06af50db51c3ff42fd493c44826"><div class="ttname"><a href="classtvm_1_1GenericFunc.html#a74a6f06af50db51c3ff42fd493c44826">tvm::GenericFunc::Get</a></div><div class="ttdeci">static GenericFunc Get(const std::string &amp;name)</div><div class="ttdoc">Find or register the GenericFunc instance corresponding to the give name. </div></div>
 <div class="ttc" id="packed__func_8h_html"><div class="ttname"><a href="packed__func_8h.html">packed_func.h</a></div><div class="ttdoc">Type-erased function used across TVM API. </div></div>
 <div class="ttc" id="with_8h_html"><div class="ttname"><a href="with_8h.html">with.h</a></div><div class="ttdoc">RAII wrapper function to enter and exit a context object similar to python&amp;#39;s with syntax...</div></div>
diff --git a/docs/api/doxygen/ir_2attrs_8h_source.html b/docs/api/doxygen/ir_2attrs_8h_source.html
index ad5cbbe..58e501e 100644
--- a/docs/api/doxygen/ir_2attrs_8h_source.html
+++ b/docs/api/doxygen/ir_2attrs_8h_source.html
@@ -89,12 +89,12 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">attrs.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="ir_2attrs_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
+<a href="ir_2attrs_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
 <div class="ttc" id="structtvm_1_1AttrError_html_a3285db0171872bc2fdde8243f6e801d9"><div class="ttname"><a href="structtvm_1_1AttrError.html#a3285db0171872bc2fdde8243f6e801d9">tvm::AttrError::AttrError</a></div><div class="ttdeci">AttrError(std::string msg)</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> attrs.h:100</div></div>
 <div class="ttc" id="classtvm_1_1DictAttrsNode_html_a9b325fbc574606d832cca3b483bac572"><div class="ttname"><a href="classtvm_1_1DictAttrsNode.html#a9b325fbc574606d832cca3b483bac572">tvm::DictAttrsNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const </div><div class="ttdef"><b>Definition:</b> attrs.h:210</div></div>
 <div class="ttc" id="structtvm_1_1detail_1_1AttrInitEntry_html_a5608a2a457a397bf11f2be2776ec0653"><div class="ttname"><a href="structtvm_1_1detail_1_1AttrInitEntry.html#a5608a2a457a397bf11f2be2776ec0653">tvm::detail::AttrInitEntry::set_lower_bound</a></div><div class="ttdeci">TSelf &amp; set_lower_bound(const T &amp;begin)</div><div class="ttdef"><b>Definition:</b> attrs.h:363</div></div>
 <div class="ttc" id="structtvm_1_1detail_1_1AttrInitEntry_html_aaba94dddd1e9c367023dbe03e76634bf"><div class="ttname"><a href="structtvm_1_1detail_1_1AttrInitEntry.html#aaba94dddd1e9c367023dbe03e76634bf">tvm::detail::AttrInitEntry::value_missing_</a></div><div class="ttdeci">bool value_missing_</div><div class="ttdef"><b>Definition:</b> attrs.h:339</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="namespacetvm_1_1detail_html_acb3382242cbf538f64edae13e4ec5a84"><div class="ttname"><a href="namespacetvm_1_1detail.html#acb3382242cbf538f64edae13e4ec5a84">tvm::detail::SetValue&lt; uint64_t &gt;</a></div><div class="ttdeci">void SetValue&lt; uint64_t &gt;(uint64_t *ptr, const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b> attrs.h:447</div></div>
 <div class="ttc" id="namespacetvm_1_1detail_html_addd17cedbd26f5b4b257d7651ca5b6fe"><div class="ttname"><a href="namespacetvm_1_1detail.html#addd17cedbd26f5b4b257d7651ca5b6fe">tvm::detail::SetValue&lt; bool &gt;</a></div><div class="ttdeci">void SetValue&lt; bool &gt;(bool *ptr, const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b> attrs.h:451</div></div>
 <div class="ttc" id="classtvm_1_1detail_1_1AttrNormalVisitor_html_a6ba81b2db584e6625a7ccf04d4bd04ed"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrNormalVisitor.html#a6ba81b2db584e6625a7ccf04d4bd04ed">tvm::detail::AttrNormalVisitor::AttrNormalVisitor</a></div><div class="ttdeci">AttrNormalVisitor(AttrVisitor *visitor)</div><div class="ttdef"><b>Definition:</b> attrs.h:278</div></div>
@@ -168,6 +168,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry_html_ae3ca1012c8502b2d3132cc81530c6c0e"><div class="ttname"><a href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html#ae3ca1012c8502b2d3132cc81530c6c0e">tvm::detail::AttrTriggerNonDefaultEntry::~AttrTriggerNonDefaultEntry</a></div><div class="ttdeci">~AttrTriggerNonDefaultEntry() DMLC_THROW_EXCEPTION</div><div class="ttdef"><b>Definition:</b> attrs.h:609</div></div>
 <div class="ttc" id="classtvm_1_1AttrsNode_html_ad6cc4d8a955e30b9f55c0b5367ccce38"><div class="ttname"><a href="classtvm_1_1AttrsNode.html#ad6cc4d8a955e30b9f55c0b5367ccce38">tvm::AttrsNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reducer) const </div><div class="ttdef"><b>Definition:</b> attrs.h:727</div></div>
 <div class="ttc" id="classtvm_1_1detail_1_1AttrExistVisitor_html_a42cfd0949c298dea06fb2c4fb39e188d"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrExistVisitor.html#a42cfd0949c298dea06fb2c4fb39e188d">tvm::detail::AttrExistVisitor::key_</a></div><div class="ttdeci">std::string key_</div><div class="ttdef"><b>Definition:</b> attrs.h:591</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1String_html_a2ee7733b1c8092383ffab8c67bf8cb20"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html#a2ee7733b1c8092383ffab8c67bf8cb20">tvm::runtime::String::CanConvertFrom</a></div><div class="ttdeci">static bool CanConvertFrom(const TVMArgValue &amp;val)</div><div class="ttdoc">Check if a TVMArgValue can be converted to String, i.e. it can be std::string or String. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1497</div></div>
 <div class="ttc" id="namespacetvm_html_a28c693333c2b15702b1a9a57dec0fbf5"><div class="ttname"><a href="namespacetvm.html#a28c693333c2b15702b1a9a57dec0fbf5">tvm::NullValue&lt; DataType &gt;</a></div><div class="ttdeci">DataType NullValue&lt; DataType &gt;()</div><div class="ttdef"><b>Definition:</b> attrs.h:90</div></div>
 <div class="ttc" id="object_8h_html_af8330e3864503fb7c4133ae4d48fe4a2"><div class="ttname"><a href="object_8h.html#af8330e3864503fb7c4133ae4d48fe4a2">TVM_DEFINE_OBJECT_REF_COW_METHOD</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_COW_METHOD(ObjectName)</div><div class="ttdoc">Define CopyOnWrite function in an ObjectRef. </div><div class="ttdef"><b>Definition:</b> object.h:757</div></div>
 <div class="ttc" id="classtvm_1_1AttrsNode_html_af8192054af4e2797953cc7b67625092f"><div class="ttname"><a href="classtvm_1_1AttrsNode.html#af8192054af4e2797953cc7b67625092f">tvm::AttrsNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const DerivedType *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> attrs.h:720</div></div>
diff --git a/docs/api/doxygen/ir_2expr_8h_source.html b/docs/api/doxygen/ir_2expr_8h_source.html
index 9d60c5e..aa4a04f 100644
--- a/docs/api/doxygen/ir_2expr_8h_source.html
+++ b/docs/api/doxygen/ir_2expr_8h_source.html
@@ -96,7 +96,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1RangeNode_html_a43d2fb12bb61cf05936a1972d0158b49"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a43d2fb12bb61cf05936a1972d0158b49">tvm::RangeNode::min</a></div><div class="ttdeci">PrimExpr min</div><div class="ttdoc">beginning of the node </div><div class="ttdef"><b>Definition:</b> expr.h:412</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html">tvm::runtime::ObjectPtr</a></div><div class="ttdoc">A custom smart pointer for Object. </div><div class="ttdef"><b>Definition:</b> object.h:350</div></div>
 <div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:322</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a918b5a9ef8fcd65d649c7dd41ff12d9f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a918b5a9ef8fcd65d649c7dd41ff12d9f">tvm::runtime::TVMPODValue_::AsObjectRef</a></div><div class="ttdeci">TObjectRef AsObjectRef() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1358</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a918b5a9ef8fcd65d649c7dd41ff12d9f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a918b5a9ef8fcd65d649c7dd41ff12d9f">tvm::runtime::TVMPODValue_::AsObjectRef</a></div><div class="ttdeci">TObjectRef AsObjectRef() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1360</div></div>
 <div class="ttc" id="node_8h_html"><div class="ttname"><a href="node_8h.html">node.h</a></div><div class="ttdoc">Definitions and helper macros for IR/AST nodes. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a5a799e4197f227549cd641b0e753f9b8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a5a799e4197f227549cd641b0e753f9b8">tvm::runtime::TVMPODValue_::type_code</a></div><div class="ttdeci">int type_code() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:425</div></div>
 <div class="ttc" id="namespacetvm_html_a002710a4652156a57495e10a09b5d002"><div class="ttname"><a href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">tvm::operator||</a></div><div class="ttdeci">Bool operator||(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:332</div></div>
@@ -178,7 +178,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:136</div></div>
 <div class="ttc" id="classtvm_1_1Integer_html_a8924d79df4414d035aafc92310fbc335"><div class="ttname"><a href="classtvm_1_1Integer.html#a8924d79df4414d035aafc92310fbc335">tvm::Integer::operator!=</a></div><div class="ttdeci">Bool operator!=(Enum other) const </div><div class="ttdef"><b>Definition:</b> expr.h:403</div></div>
 <div class="ttc" id="object_8h_html_a782d0de62fbf75736e29c1e79c22c7f1"><div class="ttname"><a href="object_8h.html#a782d0de62fbf75736e29c1e79c22c7f1">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:715</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">tvm::runtime::PackedFuncValueConverter</a></div><div class="ttdoc">Type trait to specify special value conversion rules from TVMArgValue and TVMRetValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:860</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">tvm::runtime::PackedFuncValueConverter</a></div><div class="ttdoc">Type trait to specify special value conversion rules from TVMArgValue and TVMRetValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:862</div></div>
 <div class="ttc" id="classtvm_1_1PrimExprNode_html"><div class="ttname"><a href="classtvm_1_1PrimExprNode.html">tvm::PrimExprNode</a></div><div class="ttdoc">Base node of all primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:76</div></div>
 <div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:351</div></div>
 <div class="ttc" id="classtvm_1_1RangeNode_html"><div class="ttname"><a href="classtvm_1_1RangeNode.html">tvm::RangeNode</a></div><div class="ttdoc">range over one dimension </div><div class="ttdef"><b>Definition:</b> expr.h:409</div></div>
diff --git a/docs/api/doxygen/ir_2op_8h_source.html b/docs/api/doxygen/ir_2op_8h_source.html
index 5b19766..8fbb4e3 100644
--- a/docs/api/doxygen/ir_2op_8h_source.html
+++ b/docs/api/doxygen/ir_2op_8h_source.html
@@ -91,7 +91,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="contents">
 <a href="ir_2op_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more cont [...]
 <div class="ttc" id="classtvm_1_1TypeReporter_html"><div class="ttname"><a href="classtvm_1_1TypeReporter.html">tvm::TypeReporter</a></div><div class="ttdoc">Container class of TypeReporter. </div><div class="ttdef"><b>Definition:</b> type_relation.h:139</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="classtvm_1_1OpNode_html_a4103a03a5b962b019e8274ddad61dc38"><div class="ttname"><a href="classtvm_1_1OpNode.html#a4103a03a5b962b019e8274ddad61dc38">tvm::OpNode::name</a></div><div class="ttdeci">String name</div><div class="ttdoc">name of the operator </div><div class="ttdef"><b>Definition:</b> op.h:61</div></div>
 <div class="ttc" id="classtvm_1_1TypeRelationNode_html"><div class="ttname"><a href="classtvm_1_1TypeRelationNode.html">tvm::TypeRelationNode</a></div><div class="ttdoc">User defined type relation, it is an input-output relation on types. </div><div class="ttdef"><b>Definition:</b> type_relation.h:179</div></div>
 <div class="ttc" id="classtvm_1_1OpNode_html_ade027d3176f48e7127be6ff08c723306"><div class="ttname"><a href="classtvm_1_1OpNode.html#ade027d3176f48e7127be6ff08c723306">tvm::OpNode::TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_FINAL_OBJECT_INFO(OpNode, RelayExprNode)</div></div>
diff --git a/docs/api/doxygen/namespacemembers_func_r.html b/docs/api/doxygen/namespacemembers_func_r.html
index b6e803f..c17e095 100644
--- a/docs/api/doxygen/namespacemembers_func_r.html
+++ b/docs/api/doxygen/namespacemembers_func_r.html
@@ -168,7 +168,10 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 : <a class="el" href="namespacetvm_1_1tir_1_1transform.html#a4fe43327c4454dd05b6e925577443f49">tvm::tir::transform</a>
 </li>
 <li>right_shift()
-: <a class="el" href="namespacetvm_1_1topi.html#aec8705eed0238733dc89e2a34465e9d0">tvm::topi</a>
+: <a class="el" href="namespacetvm_1_1topi.html#a9673b9caffb46404b566c3f04a492dfe">tvm::topi</a>
+</li>
+<li>rocblas_batch_matmul()
+: <a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848">tvm::topi::contrib</a>
 </li>
 <li>rocblas_matmul()
 : <a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff">tvm::topi::contrib</a>
diff --git a/docs/api/doxygen/namespacemembers_r.html b/docs/api/doxygen/namespacemembers_r.html
index 7f89c67..1f92c7c 100644
--- a/docs/api/doxygen/namespacemembers_r.html
+++ b/docs/api/doxygen/namespacemembers_r.html
@@ -186,7 +186,10 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 : <a class="el" href="namespacetvm_1_1tir_1_1transform.html#a4fe43327c4454dd05b6e925577443f49">tvm::tir::transform</a>
 </li>
 <li>right_shift()
-: <a class="el" href="namespacetvm_1_1topi.html#aec8705eed0238733dc89e2a34465e9d0">tvm::topi</a>
+: <a class="el" href="namespacetvm_1_1topi.html#af4d241b8705ec1d62785900224f77aab">tvm::topi</a>
+</li>
+<li>rocblas_batch_matmul()
+: <a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848">tvm::topi::contrib</a>
 </li>
 <li>rocblas_matmul()
 : <a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff">tvm::topi::contrib</a>
diff --git a/docs/api/doxygen/namespacetvm_1_1topi_1_1contrib.html b/docs/api/doxygen/namespacetvm_1_1topi_1_1contrib.html
index b9715ca..8e75eb8 100644
--- a/docs/api/doxygen/namespacetvm_1_1topi_1_1contrib.html
+++ b/docs/api/doxygen/namespacetvm_1_1topi_1_1contrib.html
@@ -103,6 +103,9 @@ Functions</h2></td></tr>
 <tr class="memitem:abefad1f2ad083fc038566a9ef6278dff"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff">rocblas_matmul</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;lhs, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;rhs, bool t [...]
 <tr class="memdesc:abefad1f2ad083fc038566a9ef6278dff"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create an op that multiplies lhs and rhs with rocBLAS.  <a href="#abefad1f2ad083fc038566a9ef6278dff">More...</a><br /></td></tr>
 <tr class="separator:abefad1f2ad083fc038566a9ef6278dff"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abf1113dd429e1285752b48f62fe12848"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848">rocblas_batch_matmul</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;lhs, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;rhs,  [...]
+<tr class="memdesc:abf1113dd429e1285752b48f62fe12848"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create an op that batch multiplies lhs and rhs with rocBLAS.  <a href="#abf1113dd429e1285752b48f62fe12848">More...</a><br /></td></tr>
+<tr class="separator:abf1113dd429e1285752b48f62fe12848"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 <h2 class="groupheader">Function Documentation</h2>
 <a class="anchor" id="a73b932a26374350f8a1b75b092c92091"></a>
@@ -221,6 +224,64 @@ Functions</h2></td></tr>
 
 </div>
 </div>
+<a class="anchor" id="abf1113dd429e1285752b48f62fe12848"></a>
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> tvm::topi::contrib::rocblas_batch_matmul </td>
+          <td>(</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;&#160;</td>
+          <td class="paramname"><em>lhs</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;&#160;</td>
+          <td class="paramname"><em>rhs</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">bool&#160;</td>
+          <td class="paramname"><em>transa</em>, </td>
+        </tr>
+        <tr>
+          <td class="paramkey"></td>
+          <td></td>
+          <td class="paramtype">bool&#160;</td>
+          <td class="paramname"><em>transb</em>&#160;</td>
+        </tr>
+        <tr>
+          <td></td>
+          <td>)</td>
+          <td></td><td></td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>Create an op that batch multiplies lhs and rhs with rocBLAS. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">lhs</td><td>The left matrix operand e.g. (batch_size, M, K) </td></tr>
+    <tr><td class="paramname">rhs</td><td>The right matrix operand e.g. (batch_size, K, N) </td></tr>
+    <tr><td class="paramname">transa</td><td>Whether to transpose lhs </td></tr>
+    <tr><td class="paramname">transb</td><td>Whether to transpose rhs</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>The output tensor </dd></dl>
+
+</div>
+</div>
 <a class="anchor" id="abefad1f2ad083fc038566a9ef6278dff"></a>
 <div class="memitem">
 <div class="memproto">
diff --git a/docs/api/doxygen/ndarray_8h_source.html b/docs/api/doxygen/ndarray_8h_source.html
index f8453c6..7eb4ee0 100644
--- a/docs/api/doxygen/ndarray_8h_source.html
+++ b/docs/api/doxygen/ndarray_8h_source.html
@@ -89,17 +89,17 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">ndarray.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="ndarray_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a9b53b82c11b6eedb4e1f11d6cf769651"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a9b53b82c11b6eedb4e1f11d6cf769651">tvm::runtime::NDArray::CopyTo</a></div><div class="ttdeci">void CopyTo(DLTensor *other) const </div><div class="ttdoc">Copy data content into another array. </div><div class="ttdef"><b>Definition:</b> ndarray.h:340</div></div>
+<a href="ndarray_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a9b53b82c11b6eedb4e1f11d6cf769651"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a9b53b82c11b6eedb4e1f11d6cf769651">tvm::runtime::NDArray::CopyTo</a></div><div class="ttdeci">void CopyTo(DLTensor *other) const </div><div class="ttdoc">Copy data content into another array. </div><div class="ttdef"><b>Definition:</b> ndarray.h:338</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a368d0f65ebf07dc2ee5a717a85b3bc60"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a368d0f65ebf07dc2ee5a717a85b3bc60">tvm::runtime::NDArray::CreateView</a></div><div class="ttdeci">NDArray CreateView(std::vector&lt; int64_t &gt; shape, DLDataType dtype)</div><div class="ttdoc">Create a NDArray that shares the data memory with the current one. </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ade0e2757904f4f5ba5c667ae01793a47"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ade0e2757904f4f5ba5c667ae01793a47">tvm::runtime::NDArray::FFIDecRef</a></div><div class="ttdeci">static void FFIDecRef(TVMArrayHandle handle)</div><div class="ttdoc">DecRef resource managed by an FFI array handle. </div><div class="ttdef"><b>Definition:</b> ndarray.h:380</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ade0e2757904f4f5ba5c667ae01793a47"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ade0e2757904f4f5ba5c667ae01793a47">tvm::runtime::NDArray::FFIDecRef</a></div><div class="ttdeci">static void FFIDecRef(TVMArrayHandle handle)</div><div class="ttdoc">DecRef resource managed by an FFI array handle. </div><div class="ttdef"><b>Definition:</b> ndarray.h:378</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html">tvm::runtime::ObjectPtr</a></div><div class="ttdoc">A custom smart pointer for Object. </div><div class="ttdef"><b>Definition:</b> object.h:350</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ad78792a1e1feb160b0be4474a4c13a4c"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ad78792a1e1feb160b0be4474a4c13a4c">tvm::runtime::NDArray::Load</a></div><div class="ttdeci">bool Load(dmlc::Stream *stream)</div><div class="ttdoc">Load NDArray from stream. </div><div class="ttdef"><b>Definition:</b> ndarray.h:440</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html_a39edb3823aeabc846cc38e0fae1a3c37"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html#a39edb3823aeabc846cc38e0fae1a3c37">tvm::runtime::NDArray::Container::Container</a></div><div class="ttdeci">Container(void *data, std::vector&lt; int64_t &gt; shape, DLDataType dtype, DLContext ctx)</div><div class="ttdef"><b>Definition:</b> ndarray.h:258</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ad78792a1e1feb160b0be4474a4c13a4c"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ad78792a1e1feb160b0be4474a4c13a4c">tvm::runtime::NDArray::Load</a></div><div class="ttdeci">bool Load(dmlc::Stream *stream)</div><div class="ttdoc">Load NDArray from stream. </div><div class="ttdef"><b>Definition:</b> ndarray.h:438</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html_a39edb3823aeabc846cc38e0fae1a3c37"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html#a39edb3823aeabc846cc38e0fae1a3c37">tvm::runtime::NDArray::Container::Container</a></div><div class="ttdeci">Container(void *data, std::vector&lt; int64_t &gt; shape, DLDataType dtype, DLContext ctx)</div><div class="ttdef"><b>Definition:</b> ndarray.h:256</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html">tvm::runtime::TVMPODValue_</a></div><div class="ttdoc">Internal base class to handle conversion to POD values. </div><div class="ttdef"><b>Definition:</b> packed_func.h:363</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a04129f44f5d17ab63a10e107a939f282"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a04129f44f5d17ab63a10e107a939f282">tvm::runtime::NDArray::Shape</a></div><div class="ttdeci">std::vector&lt; int64_t &gt; Shape() const </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a1550151d3616e918d45e047840b81e1e"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a1550151d3616e918d45e047840b81e1e">tvm::runtime::NDArray::CopyFrom</a></div><div class="ttdeci">void CopyFrom(const DLTensor *other)</div><div class="ttdoc">Copy data content from another array. </div><div class="ttdef"><b>Definition:</b> ndarray.h:329</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a1550151d3616e918d45e047840b81e1e"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a1550151d3616e918d45e047840b81e1e">tvm::runtime::NDArray::CopyFrom</a></div><div class="ttdeci">void CopyFrom(const DLTensor *other)</div><div class="ttdoc">Copy data content from another array. </div><div class="ttdef"><b>Definition:</b> ndarray.h:327</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a73ca58cb32f4a4adf71d274dc1e27be4"><div class="ttname"><a href="c__runtime__api_8h.html#a73ca58cb32f4a4adf71d274dc1e27be4">TVMArrayHandle</a></div><div class="ttdeci">DLTensor * TVMArrayHandle</div><div class="ttdoc">the array handle </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:138</div></div>
 <div class="ttc" id="serializer_8h_html"><div class="ttname"><a href="serializer_8h.html">serializer.h</a></div><div class="ttdoc">Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...</div></div>
@@ -107,48 +107,48 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a11ad7a277c175ee33e1551bbb77a8694"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a11ad7a277c175ee33e1551bbb77a8694">tvm::runtime::ObjectRef::get</a></div><div class="ttdeci">const Object * get() const </div><div class="ttdef"><b>Definition:</b> object.h:533</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_ae246eaa00342c042f3f194605ad9bc7a"><div class="ttname"><a href="c__runtime__api_8h.html#ae246eaa00342c042f3f194605ad9bc7a">TVMArrayCopyToBytes</a></div><div class="ttdeci">int TVMArrayCopyToBytes(TVMArrayHandle handle, void *data, size_t nbytes)</div><div class="ttdoc">Copy array data to CPU byte array. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:163</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html_afdd7050eda88b079f0a962bd413a34ea"><div class="ttname"><a href="namespacetvm_1_1runtime.html#afdd7050eda88b079f0a962bd413a34ea">tvm::runtime::TVMArrayHandleToObjectHandle</a></div><div class="ttdeci">Object * TVMArrayHandleToObjectHandle(TVMArrayHandle handle)</div><div class="ttdef"><b>Definition:</b> ndarray.h:384</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html_afdd7050eda88b079f0a962bd413a34ea"><div class="ttname"><a href="namespacetvm_1_1runtime.html#afdd7050eda88b079f0a962bd413a34ea">tvm::runtime::TVMArrayHandleToObjectHandle</a></div><div class="ttdeci">Object * TVMArrayHandleToObjectHandle(TVMArrayHandle handle)</div><div class="ttdef"><b>Definition:</b> ndarray.h:382</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a775383bcd8c0237e36bdf0c9654d62c3"><div class="ttname"><a href="c__runtime__api_8h.html#a775383bcd8c0237e36bdf0c9654d62c3">TVMGetLastError</a></div><div class="ttdeci">const char * TVMGetLastError(void)</div><div class="ttdoc">return str message of the last error all function in this file will return 0 when success and -1 when...</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_af2a8ccab95d432d1ecad7a389e11bcd3"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#af2a8ccab95d432d1ecad7a389e11bcd3">tvm::runtime::NDArray::reset</a></div><div class="ttdeci">void reset()</div><div class="ttdoc">reset the content of NDArray to be nullptr </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html">tvm::runtime::NDArray</a></div><div class="ttdoc">Managed NDArray. The array is backed by reference counted blocks. </div><div class="ttdef"><b>Definition:</b> ndarray.h:43</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_ab1d5f6b7945e1410602a8a057fda5757"><div class="ttname"><a href="c__runtime__api_8h.html#ab1d5f6b7945e1410602a8a057fda5757">TVMStreamHandle</a></div><div class="ttdeci">void * TVMStreamHandle</div><div class="ttdoc">The stream that is specific to device can be NULL, which indicates the default one. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:172</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase_html_a852a3d49f916098ea6012237dbd242fc"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html#a852a3d49f916098ea6012237dbd242fc">tvm::runtime::NDArray::ContainerBase::shape_</a></div><div class="ttdeci">std::vector&lt; int64_t &gt; shape_</div><div class="ttdoc">The shape container, can be used used for shape data. </div><div class="ttdef"><b>Definition:</b> ndarray.h:238</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase_html_a852a3d49f916098ea6012237dbd242fc"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html#a852a3d49f916098ea6012237dbd242fc">tvm::runtime::NDArray::ContainerBase::shape_</a></div><div class="ttdeci">std::vector&lt; int64_t &gt; shape_</div><div class="ttdoc">The shape container, can be used used for shape data. </div><div class="ttdef"><b>Definition:</b> ndarray.h:236</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a85576f944fc19e054d94add23e22a121"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a85576f944fc19e054d94add23e22a121">tvm::runtime::NDArray::DataType</a></div><div class="ttdeci">runtime::DataType DataType() const </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ac041c89504299f82baf990c9ad081c44"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ac041c89504299f82baf990c9ad081c44">tvm::runtime::NDArray::IsContiguous</a></div><div class="ttdeci">bool IsContiguous() const </div><div class="ttdef"><b>Definition:</b> ndarray.h:325</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ac041c89504299f82baf990c9ad081c44"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ac041c89504299f82baf990c9ad081c44">tvm::runtime::NDArray::IsContiguous</a></div><div class="ttdeci">bool IsContiguous() const </div><div class="ttdef"><b>Definition:</b> ndarray.h:323</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a69726ab0ad5204fea12960dd43b4f8e2"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a69726ab0ad5204fea12960dd43b4f8e2">tvm::runtime::NDArray::CopyToBytes</a></div><div class="ttdeci">void CopyToBytes(void *data, size_t nbytes) const </div><div class="ttdoc">Copy data content into another array. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a70fb5361147634605d6595bb89381f03"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a70fb5361147634605d6595bb89381f03">tvm::runtime::Object::DecRef</a></div><div class="ttdeci">void DecRef()</div><div class="ttdoc">developer function, decrease reference counter. </div><div class="ttdef"><b>Definition:</b> object.h:773</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_ad94d79729ac85aa7c976e23d39066383"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#ad94d79729ac85aa7c976e23d39066383">tvm::runtime::Object::RuntimeTypeIndex</a></div><div class="ttdeci">static uint32_t RuntimeTypeIndex()</div><div class="ttdef"><b>Definition:</b> object.h:217</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ae2a878fb8c847666d2318b979714cefa"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ae2a878fb8c847666d2318b979714cefa">tvm::runtime::NDArray::CopyFromTo</a></div><div class="ttdeci">static void CopyFromTo(const DLTensor *from, DLTensor *to, TVMStreamHandle stream=nullptr)</div><div class="ttdoc">Function to copy data from one array to another. </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_acc22b8911190353ab1050cccf3d015f2"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#acc22b8911190353ab1050cccf3d015f2">tvm::runtime::NDArray::Save</a></div><div class="ttdeci">void Save(dmlc::Stream *stream) const </div><div class="ttdoc">Save NDArray to stream. </div><div class="ttdef"><b>Definition:</b> ndarray.h:438</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ab76ba9c638e1d6db8d6f0ba8c4d38670"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ab76ba9c638e1d6db8d6f0ba8c4d38670">tvm::runtime::NDArray::FFIDataFromHandle</a></div><div class="ttdeci">static ObjectPtr&lt; Object &gt; FFIDataFromHandle(TVMArrayHandle handle)</div><div class="ttdoc">Construct NDArray&amp;#39;s Data field from array handle in FFI. </div><div class="ttdef"><b>Definition:</b> ndarray.h:368</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_acc22b8911190353ab1050cccf3d015f2"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#acc22b8911190353ab1050cccf3d015f2">tvm::runtime::NDArray::Save</a></div><div class="ttdeci">void Save(dmlc::Stream *stream) const </div><div class="ttdoc">Save NDArray to stream. </div><div class="ttdef"><b>Definition:</b> ndarray.h:436</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ab76ba9c638e1d6db8d6f0ba8c4d38670"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ab76ba9c638e1d6db8d6f0ba8c4d38670">tvm::runtime::NDArray::FFIDataFromHandle</a></div><div class="ttdeci">static ObjectPtr&lt; Object &gt; FFIDataFromHandle(TVMArrayHandle handle)</div><div class="ttdoc">Construct NDArray&amp;#39;s Data field from array handle in FFI. </div><div class="ttdef"><b>Definition:</b> ndarray.h:366</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_af5801a105ceb450616a83d19c5c92326"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#af5801a105ceb450616a83d19c5c92326">tvm::runtime::NDArray::NDArray</a></div><div class="ttdeci">NDArray(ObjectPtr&lt; Object &gt; data)</div><div class="ttdoc">constructor. </div><div class="ttdef"><b>Definition:</b> ndarray.h:57</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html">tvm::runtime::DataType</a></div><div class="ttdoc">Runtime primitive data type. </div><div class="ttdef"><b>Definition:</b> data_type.h:41</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">tvm::runtime::NDArray::Container</a></div><div class="ttdoc">Object container class that backs NDArray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:245</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html_adb2ed1227b418f5846d43d3234b52391"><div class="ttname"><a href="namespacetvm_1_1runtime.html#adb2ed1227b418f5846d43d3234b52391">tvm::runtime::IsContiguous</a></div><div class="ttdeci">bool IsContiguous(const DLTensor &amp;arr)</div><div class="ttdoc">check if a DLTensor is contiguous. </div><div class="ttdef"><b>Definition:</b> ndarray.h:314</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">tvm::runtime::NDArray::Container</a></div><div class="ttdoc">Object container class that backs NDArray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:243</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html_adb2ed1227b418f5846d43d3234b52391"><div class="ttname"><a href="namespacetvm_1_1runtime.html#adb2ed1227b418f5846d43d3234b52391">tvm::runtime::IsContiguous</a></div><div class="ttdeci">bool IsContiguous(const DLTensor &amp;arr)</div><div class="ttdoc">check if a DLTensor is contiguous. </div><div class="ttdef"><b>Definition:</b> ndarray.h:312</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_ac261cdb80487fb29ac42b28678f8cbef"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#ac261cdb80487fb29ac42b28678f8cbef">tvm::runtime::ObjectRef::data_</a></div><div class="ttdeci">ObjectPtr&lt; Object &gt; data_</div><div class="ttdoc">Internal pointer that backs the reference. </div><div class="ttdef"><b>Definition:</b> object.h:561</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase_html_a1063a9d01075d5b7b0e8fa31d4d72e0b"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html#a1063a9d01075d5b7b0e8fa31d4d72e0b">tvm::runtime::NDArray::ContainerBase::dl_tensor</a></div><div class="ttdeci">DLTensor dl_tensor</div><div class="ttdoc">The corresponding dl_tensor field. </div><div class="ttdef"><b>Definition:</b> ndarray.h:223</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html_a56109cfc826b26172f084c3790144351"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html#a56109cfc826b26172f084c3790144351">tvm::runtime::NDArray::Container::SetDeleter</a></div><div class="ttdeci">void SetDeleter(FDeleter deleter)</div><div class="ttdoc">Set the deleter field. </div><div class="ttdef"><b>Definition:</b> ndarray.h:274</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase_html_a1063a9d01075d5b7b0e8fa31d4d72e0b"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html#a1063a9d01075d5b7b0e8fa31d4d72e0b">tvm::runtime::NDArray::ContainerBase::dl_tensor</a></div><div class="ttdeci">DLTensor dl_tensor</div><div class="ttdoc">The corresponding dl_tensor field. </div><div class="ttdef"><b>Definition:</b> ndarray.h:221</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html_a56109cfc826b26172f084c3790144351"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html#a56109cfc826b26172f084c3790144351">tvm::runtime::NDArray::Container::SetDeleter</a></div><div class="ttdeci">void SetDeleter(FDeleter deleter)</div><div class="ttdoc">Set the deleter field. </div><div class="ttdef"><b>Definition:</b> ndarray.h:272</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_af30c02f3a3f37c7963b3af60fb9c72a1"><div class="ttname"><a href="namespacetvm_1_1topi.html#af30c02f3a3f37c7963b3af60fb9c72a1">tvm::topi::shape</a></div><div class="ttdeci">Tensor shape(const Tensor &amp;src, DataType dtype, const std::string name=&quot;T_shape&quot;, const std::string tag=kInjective)</div><div class="ttdoc">Get the shape of input tensor. </div><div class="ttdef"><b>Definition:</b> transform.h:1385</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ada8b2d6986e62b733f2c6c9c03df2dfe"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ada8b2d6986e62b733f2c6c9c03df2dfe">tvm::runtime::NDArray::ToDLPack</a></div><div class="ttdeci">DLManagedTensor * ToDLPack() const </div><div class="ttdoc">Create a reference view of NDArray that represents as DLManagedTensor. </div></div>
 <div class="ttc" id="c__runtime__api_8h_html_ace8007daffd9f2c6d954c24d870bfcc4"><div class="ttname"><a href="c__runtime__api_8h.html#ace8007daffd9f2c6d954c24d870bfcc4">tvm_index_t</a></div><div class="ttdeci">int64_t tvm_index_t</div><div class="ttdoc">type of array index. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:76</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ae347be13193ee72c6bde285c6ae787ad"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ae347be13193ee72c6bde285c6ae787ad">tvm::runtime::NDArray::operator-&gt;</a></div><div class="ttdeci">const DLTensor * operator-&gt;() const </div><div class="ttdef"><b>Definition:</b> ndarray.h:362</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ae347be13193ee72c6bde285c6ae787ad"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ae347be13193ee72c6bde285c6ae787ad">tvm::runtime::NDArray::operator-&gt;</a></div><div class="ttdeci">const DLTensor * operator-&gt;() const </div><div class="ttdef"><b>Definition:</b> ndarray.h:360</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:498</div></div>
 <div class="ttc" id="object_8h_html"><div class="ttname"><a href="object_8h.html">object.h</a></div><div class="ttdoc">A managed object in the TVM runtime. </div></div>
 <div class="ttc" id="data__type_8h_html"><div class="ttname"><a href="data__type_8h.html">data_type.h</a></div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a141e032d848c60f8261046304bdc8c4c"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a141e032d848c60f8261046304bdc8c4c">tvm::runtime::NDArray::FFIGetHandle</a></div><div class="ttdeci">static TVMArrayHandle FFIGetHandle(const ObjectRef &amp;nd)</div><div class="ttdoc">Get FFI Array handle from ndarray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:373</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html_a39b39ce5a2a658b44944381f1835404a"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html#a39b39ce5a2a658b44944381f1835404a">tvm::runtime::NDArray::Container::Container</a></div><div class="ttdeci">Container()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> ndarray.h:248</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html_acf4599f17bfe79ae1fe8afc1af053b43"><div class="ttname"><a href="namespacetvm_1_1runtime.html#acf4599f17bfe79ae1fe8afc1af053b43">tvm::runtime::kTVMNDArrayMagic</a></div><div class="ttdeci">constexpr uint64_t kTVMNDArrayMagic</div><div class="ttdoc">Magic number for NDArray file. </div><div class="ttdef"><b>Definition:</b> ndarray.h:389</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a141e032d848c60f8261046304bdc8c4c"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a141e032d848c60f8261046304bdc8c4c">tvm::runtime::NDArray::FFIGetHandle</a></div><div class="ttdeci">static TVMArrayHandle FFIGetHandle(const ObjectRef &amp;nd)</div><div class="ttdoc">Get FFI Array handle from ndarray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:371</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html_a39b39ce5a2a658b44944381f1835404a"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html#a39b39ce5a2a658b44944381f1835404a">tvm::runtime::NDArray::Container::Container</a></div><div class="ttdeci">Container()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> ndarray.h:246</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html_acf4599f17bfe79ae1fe8afc1af053b43"><div class="ttname"><a href="namespacetvm_1_1runtime.html#acf4599f17bfe79ae1fe8afc1af053b43">tvm::runtime::kTVMNDArrayMagic</a></div><div class="ttdeci">constexpr uint64_t kTVMNDArrayMagic</div><div class="ttdoc">Magic number for NDArray file. </div><div class="ttdef"><b>Definition:</b> ndarray.h:387</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a4bbb80e8e36317829dd63e7f44ffbb0f"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a4bbb80e8e36317829dd63e7f44ffbb0f">tvm::runtime::NDArray::NDArray</a></div><div class="ttdeci">NDArray()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> ndarray.h:52</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_af4d489a1208be9cc4248b592769bccf2"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#af4d489a1208be9cc4248b592769bccf2">tvm::runtime::NDArray::CopyFromBytes</a></div><div class="ttdeci">void CopyFromBytes(const void *data, size_t nbytes)</div><div class="ttdoc">Copy data content from a byte buffer. </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a87268f39e0b14b18fee24d30c0f0ad95"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a87268f39e0b14b18fee24d30c0f0ad95">tvm::runtime::NDArray::get_mutable</a></div><div class="ttdeci">Container * get_mutable() const </div><div class="ttdoc">Get mutable internal container pointer. </div><div class="ttdef"><b>Definition:</b> ndarray.h:364</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html_a8fb37910dcd9bb6899e6a3a47f006514"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a8fb37910dcd9bb6899e6a3a47f006514">tvm::runtime::SaveDLTensor</a></div><div class="ttdeci">bool SaveDLTensor(dmlc::Stream *strm, const DLTensor *tensor)</div><div class="ttdoc">Save a DLTensor to stream. </div><div class="ttdef"><b>Definition:</b> ndarray.h:391</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a87268f39e0b14b18fee24d30c0f0ad95"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a87268f39e0b14b18fee24d30c0f0ad95">tvm::runtime::NDArray::get_mutable</a></div><div class="ttdeci">Container * get_mutable() const </div><div class="ttdoc">Get mutable internal container pointer. </div><div class="ttdef"><b>Definition:</b> ndarray.h:362</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html_a8fb37910dcd9bb6899e6a3a47f006514"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a8fb37910dcd9bb6899e6a3a47f006514">tvm::runtime::SaveDLTensor</a></div><div class="ttdeci">bool SaveDLTensor(dmlc::Stream *strm, const DLTensor *tensor)</div><div class="ttdoc">Save a DLTensor to stream. </div><div class="ttdef"><b>Definition:</b> ndarray.h:389</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_abec485628a0ca451b668c42fd8fa691a"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#abec485628a0ca451b668c42fd8fa691a">tvm::runtime::NDArray::FromDLPack</a></div><div class="ttdeci">static NDArray FromDLPack(DLManagedTensor *tensor)</div><div class="ttdoc">Create a NDArray backed by a dlpack tensor. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a2ca068cfab0b928e5809631c1bec64e7"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a2ca068cfab0b928e5809631c1bec64e7">tvm::runtime::NDArray::Empty</a></div><div class="ttdeci">static NDArray Empty(std::vector&lt; int64_t &gt; shape, DLDataType dtype, DLContext ctx)</div><div class="ttdoc">Create an empty NDArray. </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1080</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1082</div></div>
 <div class="ttc" id="object_8h_html_ac2b7418e9549512b5db0126cf2a716f1"><div class="ttname"><a href="object_8h.html#ac2b7418e9549512b5db0126cf2a716f1">TVM_DECLARE_BASE_OBJECT_INFO</a></div><div class="ttdeci">#define TVM_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType)</div><div class="ttdoc">helper macro to declare a base object type that can be inheritated. </div><div class="ttdef"><b>Definition:</b> object.h:635</div></div>
 <div class="ttc" id="c__runtime__api_8h_html"><div class="ttname"><a href="c__runtime__api_8h.html">c_runtime_api.h</a></div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_acb2fd06934aac23ba492e78619c7eaa5"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#acb2fd06934aac23ba492e78619c7eaa5">tvm::runtime::NDArray::use_count</a></div><div class="ttdeci">int use_count() const </div><div class="ttdef"><b>Definition:</b> ndarray.h:360</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html_a59940b6d63dd4c5175c0fe875047c1cf"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a59940b6d63dd4c5175c0fe875047c1cf">tvm::runtime::GetDataSize</a></div><div class="ttdeci">size_t GetDataSize(const DLTensor &amp;arr)</div><div class="ttdoc">return the size of data the DLTensor hold, in term of number of bytes </div><div class="ttdef"><b>Definition:</b> ndarray.h:300</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_acb2fd06934aac23ba492e78619c7eaa5"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#acb2fd06934aac23ba492e78619c7eaa5">tvm::runtime::NDArray::use_count</a></div><div class="ttdeci">int use_count() const </div><div class="ttdef"><b>Definition:</b> ndarray.h:358</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html_a59940b6d63dd4c5175c0fe875047c1cf"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a59940b6d63dd4c5175c0fe875047c1cf">tvm::runtime::GetDataSize</a></div><div class="ttdeci">size_t GetDataSize(const DLTensor &amp;arr)</div><div class="ttdoc">return the size of data the DLTensor hold, in term of number of bytes </div><div class="ttdef"><b>Definition:</b> ndarray.h:298</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1TypeIndex_html_aed93c7318efc8052201d4c404b21a40da48232c4de1fa5119f58c3ba3fc88334c"><div class="ttname"><a href="structtvm_1_1runtime_1_1TypeIndex.html#aed93c7318efc8052201d4c404b21a40da48232c4de1fa5119f58c3ba3fc88334c">tvm::runtime::TypeIndex::kRuntimeNDArray</a></div><div class="ttdoc">runtime::NDArray. </div><div class="ttdef"><b>Definition:</b> object.h:64</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/api/doxygen/object_8h_source.html b/docs/api/doxygen/object_8h_source.html
index d7989ea..9fcabe3 100644
--- a/docs/api/doxygen/object_8h_source.html
+++ b/docs/api/doxygen/object_8h_source.html
@@ -91,7 +91,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="contents">
 <a href="object_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more cont [...]
 <div class="ttc" id="structtvm_1_1runtime_1_1TypeIndex_html_aed93c7318efc8052201d4c404b21a40da4ac0fbbbd83cb6e789b821b8ae8556f3"><div class="ttname"><a href="structtvm_1_1runtime_1_1TypeIndex.html#aed93c7318efc8052201d4c404b21a40da4ac0fbbbd83cb6e789b821b8ae8556f3">tvm::runtime::TypeIndex::kRuntimeMap</a></div><div class="ttdoc">runtime::Map. </div><div class="ttdef"><b>Definition:</b> object.h:70</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a594c0d786dbc567ff6774ab11b45958a"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a594c0d786dbc567ff6774ab11b45958a">tvm::runtime::Object::type_index</a></div><div class="ttdeci">uint32_t type_index() const </div><div class="ttdef"><b>Definition:</b> object.h:171</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1TypeIndex_html_aed93c7318efc8052201d4c404b21a40da4551e7a3e4de3461648733bdfdefdbc4"><div class="ttname"><a href="structtvm_1_1runtime_1_1TypeIndex.html#aed93c7318efc8052201d4c404b21a40da4551e7a3e4de3461648733bdfdefdbc4">tvm::runtime::TypeIndex::kStaticIndexEnd</a></div><div class="ttdef"><b>Definition:</b> object.h:74</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_ab7968feb6ad38ecaffc320e13819d826"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#ab7968feb6ad38ecaffc320e13819d826">tvm::runtime::Object::Object</a></div><div class="ttdeci">Object(const Object &amp;other)</div><div class="ttdef"><b>Definition:</b> object.h:239</div></div>
@@ -123,7 +123,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a70fb5361147634605d6595bb89381f03"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a70fb5361147634605d6595bb89381f03">tvm::runtime::Object::DecRef</a></div><div class="ttdeci">void DecRef()</div><div class="ttdoc">developer function, decrease reference counter. </div><div class="ttdef"><b>Definition:</b> object.h:773</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_ad94d79729ac85aa7c976e23d39066383"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#ad94d79729ac85aa7c976e23d39066383">tvm::runtime::Object::RuntimeTypeIndex</a></div><div class="ttdeci">static uint32_t RuntimeTypeIndex()</div><div class="ttdef"><b>Definition:</b> object.h:217</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a4142990fd1959abc95aa0cccca4f6cda"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a4142990fd1959abc95aa0cccca4f6cda">tvm::runtime::ObjectRef::unique</a></div><div class="ttdeci">bool unique() const </div><div class="ttdef"><b>Definition:</b> object.h:537</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMMovableArgValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html">tvm::runtime::TVMMovableArgValue_</a></div><div class="ttdoc">Internal auxiliary struct for TypedPackedFunc to indicate a movable argument. </div><div class="ttdef"><b>Definition:</b> packed_func.h:523</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMMovableArgValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html">tvm::runtime::TVMMovableArgValue_</a></div><div class="ttdoc">Internal auxiliary struct for TypedPackedFunc to indicate a movable argument. </div><div class="ttdef"><b>Definition:</b> packed_func.h:525</div></div>
 <div class="ttc" id="namespacetvm_1_1runtime_html_a46bcf7948293194c5b9c7db91da96381"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a46bcf7948293194c5b9c7db91da96381">tvm::runtime::GetObjectPtr</a></div><div class="ttdeci">ObjectPtr&lt; BaseType &gt; GetObjectPtr(ObjectType *ptr)</div><div class="ttdoc">Get an object ptr type from a raw object ptr. </div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1ObjectPtrEqual_html_aa04994521b5d230cf11231db5a15b201"><div class="ttname"><a href="structtvm_1_1runtime_1_1ObjectPtrEqual.html#aa04994521b5d230cf11231db5a15b201">tvm::runtime::ObjectPtrEqual::operator()</a></div><div class="ttdeci">bool operator()(const ObjectRef &amp;a, const ObjectRef &amp;b) const </div><div class="ttdef"><b>Definition:</b> object.h:622</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_aa1612f69ea5b4225d4cda759cd517323"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#aa1612f69ea5b4225d4cda759cd517323">tvm::runtime::Object::Object</a></div><div class="ttdeci">Object(Object &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> object.h:241</div></div>
@@ -160,7 +160,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html_a29eed1f62407d1ea7b28725ea620f0e4"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html#a29eed1f62407d1ea7b28725ea620f0e4">tvm::runtime::ObjectPtr::ObjectPtr</a></div><div class="ttdeci">ObjectPtr()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> object.h:353</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html_a62e18d1b81152c6caf93961eca6d04bc"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html#a62e18d1b81152c6caf93961eca6d04bc">tvm::runtime::ObjectPtr::ObjectPtr</a></div><div class="ttdeci">ObjectPtr(ObjectPtr&lt; Y &gt; &amp;&amp;other)</div><div class="ttdoc">move constructor </div><div class="ttdef"><b>Definition:</b> object.h:385</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html_ac56d7fad8cbc348ad8ef0e7e23ee90e2"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html#ac56d7fad8cbc348ad8ef0e7e23ee90e2">tvm::runtime::ObjectPtr::~ObjectPtr</a></div><div class="ttdeci">~ObjectPtr()</div><div class="ttdoc">destructor </div><div class="ttdef"><b>Definition:</b> object.h:392</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1080</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1082</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a4bfc2586cb55f2af47728187b3256255"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a4bfc2586cb55f2af47728187b3256255">tvm::runtime::Object::type_index_</a></div><div class="ttdeci">uint32_t type_index_</div><div class="ttdoc">Type index(tag) that indicates the type of the object. </div><div class="ttdef"><b>Definition:</b> object.h:253</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_ae341e561272ff43cdcbc927bc29ac50d"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#ae341e561272ff43cdcbc927bc29ac50d">tvm::runtime::Object::operator=</a></div><div class="ttdeci">Object &amp; operator=(Object &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> object.h:246</div></div>
 <div class="ttc" id="classtvm_1_1RelayRefType_html"><div class="ttname"><a href="classtvm_1_1RelayRefType.html">tvm::RelayRefType</a></div><div class="ttdoc">Managed reference to RelayRefTypeNode. </div><div class="ttdef"><b>Definition:</b> type.h:537</div></div>
diff --git a/docs/api/doxygen/packed__func_8h.html b/docs/api/doxygen/packed__func_8h.html
index 8734f82..3165bd0 100644
--- a/docs/api/doxygen/packed__func_8h.html
+++ b/docs/api/doxygen/packed__func_8h.html
@@ -258,7 +258,7 @@ Functions</h2></td></tr>
         </tr>
       </table>
 </div><div class="memdoc">
-<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">extern</span> <span class="stringliteral">&quot;C&quot;</span> {                                                                              \</div><div class="line">  TVM_DLL <span class="keywordtype">int</span> ExportName(<a class="code" href="unionTVMValue.html">TVMValue</a>* args, <span class="keywordtype">int</span>* type_code, <span class="keywordtype">int</span> num_args, <a class="code" href="unionTVMValu [...]
+<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">extern</span> <span class="stringliteral">&quot;C&quot;</span> {                                                                              \</div><div class="line">  TVM_DLL <span class="keywordtype">int</span> ExportName(<a class="code" href="unionTVMValue.html">TVMValue</a>* args, <span class="keywordtype">int</span>* type_code, <span class="keywordtype">int</span> num_args, <a class="code" href="unionTVMValu [...]
 <div class="ttc" id="c__runtime__api_8h_html_a477111f9accd70633dc5f2e7139b6cf4"><div class="ttname"><a href="c__runtime__api_8h.html#a477111f9accd70633dc5f2e7139b6cf4">TVMAPISetLastError</a></div><div class="ttdeci">void TVMAPISetLastError(const char *msg)</div><div class="ttdoc">Used for implementing C API function. Set last error message before return. </div></div>
 <div class="ttc" id="unionTVMValue_html"><div class="ttname"><a href="unionTVMValue.html">TVMValue</a></div><div class="ttdoc">Union type of values being passed through API and function calls. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:144</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html">tvm::runtime::TVMArgs</a></div><div class="ttdoc">Arguments into TVM functions. </div><div class="ttdef"><b>Definition:</b> packed_func.h:308</div></div>
@@ -298,7 +298,7 @@ Functions</h2></td></tr>
         </tr>
       </table>
 </div><div class="memdoc">
-<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">extern</span> <span class="stringliteral">&quot;C&quot;</span> {                                                                              \</div><div class="line">  TVM_DLL <span class="keywordtype">int</span> ExportName(<a class="code" href="unionTVMValue.html">TVMValue</a>* args, <span class="keywordtype">int</span>* type_code, <span class="keywordtype">int</span> num_args, <a class="code" href="unionTVMValu [...]
+<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">extern</span> <span class="stringliteral">&quot;C&quot;</span> {                                                                              \</div><div class="line">  TVM_DLL <span class="keywordtype">int</span> ExportName(<a class="code" href="unionTVMValue.html">TVMValue</a>* args, <span class="keywordtype">int</span>* type_code, <span class="keywordtype">int</span> num_args, <a class="code" href="unionTVMValu [...]
 <div class="ttc" id="c__runtime__api_8h_html_a477111f9accd70633dc5f2e7139b6cf4"><div class="ttname"><a href="c__runtime__api_8h.html#a477111f9accd70633dc5f2e7139b6cf4">TVMAPISetLastError</a></div><div class="ttdeci">void TVMAPISetLastError(const char *msg)</div><div class="ttdoc">Used for implementing C API function. Set last error message before return. </div></div>
 <div class="ttc" id="unionTVMValue_html"><div class="ttname"><a href="unionTVMValue.html">TVMValue</a></div><div class="ttdoc">Union type of values being passed through API and function calls. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:144</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html">tvm::runtime::TVMArgs</a></div><div class="ttdoc">Arguments into TVM functions. </div><div class="ttdef"><b>Definition:</b> packed_func.h:308</div></div>
diff --git a/docs/api/doxygen/packed__func_8h_source.html b/docs/api/doxygen/packed__func_8h_source.html
index 8ce72c0..57d7b97 100644
--- a/docs/api/doxygen/packed__func_8h_source.html
+++ b/docs/api/doxygen/packed__func_8h_source.html
@@ -89,29 +89,29 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">packed_func.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="packed__func_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mor [...]
-<div class="ttc" id="namespacetvm_1_1runtime_html_a129050a60cebb0bbe18f96b41a36a948"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a129050a60cebb0bbe18f96b41a36a948">tvm::runtime::ArgTypeCode2Str</a></div><div class="ttdeci">const char * ArgTypeCode2Str(int type_code)</div><div class="ttdoc">Convert argument type code to string. </div><div class="ttdef"><b>Definition:</b> packed_func.h:979</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_ab764d801efc097fe2d09c4e9935cc581"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#ab764d801efc097fe2d09c4e9935cc581">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const std::string &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1130</div></div>
+<a href="packed__func_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mor [...]
+<div class="ttc" id="namespacetvm_1_1runtime_html_a129050a60cebb0bbe18f96b41a36a948"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a129050a60cebb0bbe18f96b41a36a948">tvm::runtime::ArgTypeCode2Str</a></div><div class="ttdeci">const char * ArgTypeCode2Str(int type_code)</div><div class="ttdoc">Convert argument type code to string. </div><div class="ttdef"><b>Definition:</b> packed_func.h:981</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_ab764d801efc097fe2d09c4e9935cc581"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#ab764d801efc097fe2d09c4e9935cc581">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const std::string &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1132</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_afec79c909bdf103a01cdb4732c0afdf3"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#afec79c909bdf103a01cdb4732c0afdf3">tvm::runtime::TVMArgs::num_args</a></div><div class="ttdeci">int num_args</div><div class="ttdef"><b>Definition:</b> packed_func.h:312</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgValue_html_a987b2fb283cea5484d4655e3f711c046"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgValue.html#a987b2fb283cea5484d4655e3f711c046">tvm::runtime::TVMArgValue::TVMArgValue</a></div><div class="ttdeci">TVMArgValue()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:463</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_aba4ec0ffadb2adc12cc5107cfab6374b"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#aba4ec0ffadb2adc12cc5107cfab6374b">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, TVMContext value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1114</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a952f62623ec147f366363b664d749566"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a952f62623ec147f366363b664d749566">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(std::string value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:662</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1Module_html_a1233f7b896bb299ef07f9e41a4ffdc17"><div class="ttname"><a href="classtvm_1_1runtime_1_1Module.html#a1233f7b896bb299ef07f9e41a4ffdc17">tvm::runtime::Module::GetFunction</a></div><div class="ttdeci">PackedFunc GetFunction(const std::string &amp;name, bool query_imports=false)</div><div class="ttdoc">Get packed function from current module by name. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1459</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ade0e2757904f4f5ba5c667ae01793a47"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ade0e2757904f4f5ba5c667ae01793a47">tvm::runtime::NDArray::FFIDecRef</a></div><div class="ttdeci">static void FFIDecRef(TVMArrayHandle handle)</div><div class="ttdoc">DecRef resource managed by an FFI array handle. </div><div class="ttdef"><b>Definition:</b> ndarray.h:380</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ae47baae854e2ff66d0ef87178727d8f4"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ae47baae854e2ff66d0ef87178727d8f4">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMMovableArgValue_ &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:705</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a74ea2767d491c57cb9c71e26ee934344"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a74ea2767d491c57cb9c71e26ee934344">tvm::runtime::TVMRetValue::MoveToCHost</a></div><div class="ttdeci">void MoveToCHost(TVMValue *ret_value, int *ret_type_code)</div><div class="ttdoc">Move the value back to front-end via C API. This marks the current container as null. The managed resources are moved to the front-end. The fr [...]
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_aba4ec0ffadb2adc12cc5107cfab6374b"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#aba4ec0ffadb2adc12cc5107cfab6374b">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, TVMContext value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1116</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a952f62623ec147f366363b664d749566"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a952f62623ec147f366363b664d749566">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(std::string value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:664</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1Module_html_a1233f7b896bb299ef07f9e41a4ffdc17"><div class="ttname"><a href="classtvm_1_1runtime_1_1Module.html#a1233f7b896bb299ef07f9e41a4ffdc17">tvm::runtime::Module::GetFunction</a></div><div class="ttdeci">PackedFunc GetFunction(const std::string &amp;name, bool query_imports=false)</div><div class="ttdoc">Get packed function from current module by name. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1461</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ade0e2757904f4f5ba5c667ae01793a47"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ade0e2757904f4f5ba5c667ae01793a47">tvm::runtime::NDArray::FFIDecRef</a></div><div class="ttdeci">static void FFIDecRef(TVMArrayHandle handle)</div><div class="ttdoc">DecRef resource managed by an FFI array handle. </div><div class="ttdef"><b>Definition:</b> ndarray.h:378</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ae47baae854e2ff66d0ef87178727d8f4"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ae47baae854e2ff66d0ef87178727d8f4">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMMovableArgValue_ &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:707</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a74ea2767d491c57cb9c71e26ee934344"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a74ea2767d491c57cb9c71e26ee934344">tvm::runtime::TVMRetValue::MoveToCHost</a></div><div class="ttdeci">void MoveToCHost(TVMValue *ret_value, int *ret_type_code)</div><div class="ttdoc">Move the value back to front-end via C API. This marks the current container as null. The managed resources are moved to the front-end. The fr [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html">tvm::runtime::ObjectPtr</a></div><div class="ttdoc">A custom smart pointer for Object. </div><div class="ttdef"><b>Definition:</b> object.h:350</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_a5d2aeac7387e436e6e19d7a6dfe91307"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#a5d2aeac7387e436e6e19d7a6dfe91307">tvm::runtime::TVMArgs::TVMArgs</a></div><div class="ttdeci">TVMArgs(const TVMValue *values, const int *type_codes, int num_args)</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:319</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;</a></div><div class="ttdoc">A PackedFunc wrapper to provide typed function signature. It is backed by a PackedFunc internally...</div><div class="ttdef"><b>Definition:</b> packed_func.h:178</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a5f6f9decf7207118e282b20a874e84e7"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a5f6f9decf7207118e282b20a874e84e7">kTVMContext</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:111</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a918b5a9ef8fcd65d649c7dd41ff12d9f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a918b5a9ef8fcd65d649c7dd41ff12d9f">tvm::runtime::TVMPODValue_::AsObjectRef</a></div><div class="ttdeci">TObjectRef AsObjectRef() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1358</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a8c90a03541fb6c3f068cc8fccf83762a"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a8c90a03541fb6c3f068cc8fccf83762a">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TypedPackedFunc&lt; FType &gt; &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1148</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a510ec87e6718c0e6197e7321ca8774f2"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a510ec87e6718c0e6197e7321ca8774f2">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(bool value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:657</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a918b5a9ef8fcd65d649c7dd41ff12d9f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a918b5a9ef8fcd65d649c7dd41ff12d9f">tvm::runtime::TVMPODValue_::AsObjectRef</a></div><div class="ttdeci">TObjectRef AsObjectRef() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1360</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a8c90a03541fb6c3f068cc8fccf83762a"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a8c90a03541fb6c3f068cc8fccf83762a">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TypedPackedFunc&lt; FType &gt; &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1150</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a510ec87e6718c0e6197e7321ca8774f2"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a510ec87e6718c0e6197e7321ca8774f2">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(bool value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:659</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_aed3e983e990c5c4ed3ac09b95055297e"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#aed3e983e990c5c4ed3ac09b95055297e">tvm::runtime::TVMPODValue_::type_code_</a></div><div class="ttdeci">int type_code_</div><div class="ttdoc">the type code </div><div class="ttdef"><b>Definition:</b> packed_func.h:451</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a5a799e4197f227549cd641b0e753f9b8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a5a799e4197f227549cd641b0e753f9b8">tvm::runtime::TVMPODValue_::type_code</a></div><div class="ttdeci">int type_code() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:425</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html">tvm::runtime::TVMPODValue_</a></div><div class="ttdoc">Internal base class to handle conversion to POD values. </div><div class="ttdef"><b>Definition:</b> packed_func.h:363</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_aecd3c16855218ccac5682b6d2864e928"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#aecd3c16855218ccac5682b6d2864e928">tvm::runtime::TVMArgs::operator[]</a></div><div class="ttdeci">TVMArgValue operator[](int i) const </div><div class="ttdoc">Get i-th argument. </div><div class="ttdef"><b>Definition:</b> packed_func.h:966</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_aecd3c16855218ccac5682b6d2864e928"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#aecd3c16855218ccac5682b6d2864e928">tvm::runtime::TVMArgs::operator[]</a></div><div class="ttdeci">TVMArgValue operator[](int i) const </div><div class="ttdoc">Get i-th argument. </div><div class="ttdef"><b>Definition:</b> packed_func.h:968</div></div>
 <div class="ttc" id="namespacetvm_1_1runtime_html_ac360f2d9815036ab1b4d84d9cfb46b4f"><div class="ttname"><a href="namespacetvm_1_1runtime.html#ac360f2d9815036ab1b4d84d9cfb46b4f">tvm::runtime::DLDataType2String</a></div><div class="ttdeci">std::string DLDataType2String(DLDataType t)</div><div class="ttdoc">convert a TVM type to string. </div><div class="ttdef"><b>Definition:</b> data_type.h:325</div></div>
 <div class="ttc" id="runtime_2container_8h_html"><div class="ttname"><a href="runtime_2container_8h.html">container.h</a></div><div class="ttdoc">Common POD(plain old data) container types. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_af1359ebff2c328ff0bb80083937c95dd"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#af1359ebff2c328ff0bb80083937c95dd">tvm::runtime::Object::IsInstance</a></div><div class="ttdeci">bool IsInstance() const </div><div class="ttdef"><b>Definition:</b> object.h:801</div></div>
@@ -121,14 +121,14 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="crt_2packed__func_8h_html_ad869d7c5618f982f6841399c216a234c"><div class="ttname"><a href="crt_2packed__func_8h.html#ad869d7c5618f982f6841399c216a234c">TVMArgs</a></div><div class="ttdeci">struct TVMArgs TVMArgs</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_ac0ad0eb56ab0f12d91adb50dab38ddab"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#ac0ad0eb56ab0f12d91adb50dab38ddab">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::packed</a></div><div class="ttdeci">const PackedFunc &amp; packed() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:286</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a41343a05e42d08053d9fa1da85f70af6"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a41343a05e42d08053d9fa1da85f70af6">kTVMModuleHandle</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:114</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a0fbb9197b832b52e3da2fb37643199d9"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a0fbb9197b832b52e3da2fb37643199d9">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TVMRetValue &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1151</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMMovableArgValue___html_a8eca9048535541f374a5806f9648131b"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html#a8eca9048535541f374a5806f9648131b">tvm::runtime::TVMMovableArgValue_::TVMMovableArgValue_</a></div><div class="ttdeci">TVMMovableArgValue_(TVMValue value, int type_code)</div><div class="ttdef"><b>Definition:</b> packed_func.h:525</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a063b7950d32288a967011949a7a70dc8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a063b7950d32288a967011949a7a70dc8">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMContext value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:646</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a16925c8479ab919bcd179692b4f14a24"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a16925c8479ab919bcd179692b4f14a24">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, DLDataType value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1118</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_a694bef845554c39844680babd68f35de"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#a694bef845554c39844680babd68f35de">tvm::runtime::TVMArgs::size</a></div><div class="ttdeci">int size() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:972</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a0eeb2af3fa21cebfdc2bcf04b2fbb1f6"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a0eeb2af3fa21cebfdc2bcf04b2fbb1f6">tvm::runtime::TVMRetValue::~TVMRetValue</a></div><div class="ttdeci">~TVMRetValue()</div><div class="ttdoc">destructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:570</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a0fbb9197b832b52e3da2fb37643199d9"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a0fbb9197b832b52e3da2fb37643199d9">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TVMRetValue &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1153</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMMovableArgValue___html_a8eca9048535541f374a5806f9648131b"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html#a8eca9048535541f374a5806f9648131b">tvm::runtime::TVMMovableArgValue_::TVMMovableArgValue_</a></div><div class="ttdeci">TVMMovableArgValue_(TVMValue value, int type_code)</div><div class="ttdef"><b>Definition:</b> packed_func.h:527</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a063b7950d32288a967011949a7a70dc8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a063b7950d32288a967011949a7a70dc8">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMContext value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:648</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a16925c8479ab919bcd179692b4f14a24"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a16925c8479ab919bcd179692b4f14a24">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, DLDataType value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1120</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_a694bef845554c39844680babd68f35de"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#a694bef845554c39844680babd68f35de">tvm::runtime::TVMArgs::size</a></div><div class="ttdeci">int size() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:974</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a0eeb2af3fa21cebfdc2bcf04b2fbb1f6"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a0eeb2af3fa21cebfdc2bcf04b2fbb1f6">tvm::runtime::TVMRetValue::~TVMRetValue</a></div><div class="ttdeci">~TVMRetValue()</div><div class="ttdoc">destructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:572</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a6ee930a6be374837d4d85dbe1abeb842"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a6ee930a6be374837d4d85dbe1abeb842">kTVMDataType</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:110</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a0eff4c00e945424c0a2ca270f627aca4"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a0eff4c00e945424c0a2ca270f627aca4">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const PackedFunc &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1138</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a0eff4c00e945424c0a2ca270f627aca4"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a0eff4c00e945424c0a2ca270f627aca4">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const PackedFunc &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1140</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_a3b99059e2f1ad08c99b42b5bee82752f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#a3b99059e2f1ad08c99b42b5bee82752f">tvm::runtime::TVMArgs::values</a></div><div class="ttdeci">const TVMValue * values</div><div class="ttdef"><b>Definition:</b> packed_func.h:310</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a396e8ab64a9b33d46bcbedb84819f194"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a396e8ab64a9b33d46bcbedb84819f194">kTVMPackedFuncHandle</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:115</div></div>
 <div class="ttc" id="structTVMByteArray_html_a86d8e8341ce407b7d9374d887143e476"><div class="ttname"><a href="structTVMByteArray.html#a86d8e8341ce407b7d9374d887143e476">TVMByteArray::size</a></div><div class="ttdeci">size_t size</div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:159</div></div>
@@ -136,112 +136,112 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="structtvm_1_1runtime_1_1ObjectTypeChecker_html_a3498eb545b33e1c23a417fa58ec51dd6"><div class="ttname"><a href="structtvm_1_1runtime_1_1ObjectTypeChecker.html#a3498eb545b33e1c23a417fa58ec51dd6">tvm::runtime::ObjectTypeChecker::TypeName</a></div><div class="ttdeci">static std::string TypeName()</div><div class="ttdef"><b>Definition:</b> packed_func.h:353</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1ObjectTypeChecker_html_aba05348357ac739ea2d3deecc7a1b202"><div class="ttname"><a href="structtvm_1_1runtime_1_1ObjectTypeChecker.html#aba05348357ac739ea2d3deecc7a1b202">tvm::runtime::ObjectTypeChecker::Check</a></div><div class="ttdeci">static bool Check(const Object *ptr)</div><div class="ttdef"><b>Definition:</b> packed_func.h:348</div></div>
 <div class="ttc" id="unionTVMValue_html"><div class="ttname"><a href="unionTVMValue.html">TVMValue</a></div><div class="ttdoc">Union type of values being passed through API and function calls. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:144</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a1869ead078ccedb854a4ef245c4c0b2c"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a1869ead078ccedb854a4ef245c4c0b2c">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, void *value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1106</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a1869ead078ccedb854a4ef245c4c0b2c"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a1869ead078ccedb854a4ef245c4c0b2c">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, void *value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1108</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_a66c99486332437e0459429dd00db2710"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#a66c99486332437e0459429dd00db2710">tvm::runtime::PackedFunc::PackedFunc</a></div><div class="ttdeci">PackedFunc(FType body)</div><div class="ttdoc">constructing a packed function from a std::function. </div><div class="ttdef"><b>Definition:</b> packed_func.h:104</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:163</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793abe76912731a3c65bd7cdd9ab9a462c66"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793abe76912731a3c65bd7cdd9ab9a462c66">kTVMObjectHandle</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:113</div></div>
 <div class="ttc" id="structTVMByteArray_html_ab124e3227a75e0e4d55452f675f4fde1"><div class="ttname"><a href="structTVMByteArray.html#ab124e3227a75e0e4d55452f675f4fde1">TVMByteArray::data</a></div><div class="ttdeci">const char * data</div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:158</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html_afdd7050eda88b079f0a962bd413a34ea"><div class="ttname"><a href="namespacetvm_1_1runtime.html#afdd7050eda88b079f0a962bd413a34ea">tvm::runtime::TVMArrayHandleToObjectHandle</a></div><div class="ttdeci">Object * TVMArrayHandleToObjectHandle(TVMArrayHandle handle)</div><div class="ttdef"><b>Definition:</b> ndarray.h:384</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html_afdd7050eda88b079f0a962bd413a34ea"><div class="ttname"><a href="namespacetvm_1_1runtime.html#afdd7050eda88b079f0a962bd413a34ea">tvm::runtime::TVMArrayHandleToObjectHandle</a></div><div class="ttdeci">Object * TVMArrayHandleToObjectHandle(TVMArrayHandle handle)</div><div class="ttdef"><b>Definition:</b> ndarray.h:382</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a8ffdfcc7099faf19ee07a5c03ce06af8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a8ffdfcc7099faf19ee07a5c03ce06af8">tvm::runtime::TVMPODValue_::value_</a></div><div class="ttdeci">TVMValue value_</div><div class="ttdoc">The value. </div><div class="ttdef"><b>Definition:</b> packed_func.h:449</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a77455a8fe7d27b90a01a64f1cd28e9ec"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a77455a8fe7d27b90a01a64f1cd28e9ec">tvm::runtime::TVMRetValue::TVMRetValue</a></div><div class="ttdeci">TVMRetValue()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:560</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a77455a8fe7d27b90a01a64f1cd28e9ec"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a77455a8fe7d27b90a01a64f1cd28e9ec">tvm::runtime::TVMRetValue::TVMRetValue</a></div><div class="ttdeci">TVMRetValue()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:562</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html">tvm::runtime::NDArray</a></div><div class="ttdoc">Managed NDArray. The array is backed by reference counted blocks. </div><div class="ttdef"><b>Definition:</b> ndarray.h:43</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a75696cb65d294217796fbc4f1c22d7f8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a75696cb65d294217796fbc4f1c22d7f8">tvm::runtime::TVMRetValue::value</a></div><div class="ttdeci">const TVMValue &amp; value() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:741</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a5d251ed32af617354c8622689a6d6e48"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a5d251ed32af617354c8622689a6d6e48">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(int value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:641</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a75696cb65d294217796fbc4f1c22d7f8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a75696cb65d294217796fbc4f1c22d7f8">tvm::runtime::TVMRetValue::value</a></div><div class="ttdeci">const TVMValue &amp; value() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:743</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a5d251ed32af617354c8622689a6d6e48"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a5d251ed32af617354c8622689a6d6e48">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(int value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:643</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a22e5bb9d64dbc773bb9263b70882239e"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a22e5bb9d64dbc773bb9263b70882239e">tvm::runtime::ObjectRef::FFIClearAfterMove</a></div><div class="ttdeci">static void FFIClearAfterMove(ObjectRef *ref)</div><div class="ttdoc">Clear the object ref data field without DecRef after we successfully moved the field. </div><div class="ttdef"><b>Definition:</b> object.h:579</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a4993d4b338c28096b56ed3d0d9ae5170"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a4993d4b338c28096b56ed3d0d9ae5170">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMRetValue &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:614</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a4993d4b338c28096b56ed3d0d9ae5170"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a4993d4b338c28096b56ed3d0d9ae5170">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMRetValue &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:616</div></div>
 <div class="ttc" id="structTVMByteArray_html"><div class="ttname"><a href="structTVMByteArray.html">TVMByteArray</a></div><div class="ttdoc">Byte array type used to pass in byte array When kTVMBytes is used as data type. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:157</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_afd42f6574de11e3b62d40509a94f4f9f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#afd42f6574de11e3b62d40509a94f4f9f">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMByteArray value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:666</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_afd42f6574de11e3b62d40509a94f4f9f"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#afd42f6574de11e3b62d40509a94f4f9f">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(TVMByteArray value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:668</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_a36ca0d1876544463ee848766e70e5e96"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#a36ca0d1876544463ee848766e70e5e96">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::TypedPackedFunc</a></div><div class="ttdeci">TypedPackedFunc(const FLambda &amp;typed_lambda)</div><div class="ttdoc">construct from a lambda function with the same signature. </div><div [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_aafc5d18f0ac508fdd8a90b5af9bc1eff"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#aafc5d18f0ac508fdd8a90b5af9bc1eff">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const TVMArgValue &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:701</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_aafc5d18f0ac508fdd8a90b5af9bc1eff"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#aafc5d18f0ac508fdd8a90b5af9bc1eff">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const TVMArgValue &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:703</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_ac04ed1678799ef2f0d22204edddcdb41"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#ac04ed1678799ef2f0d22204edddcdb41">tvm::runtime::PackedFunc::PackedFunc</a></div><div class="ttdeci">PackedFunc()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:97</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_abf351bdd97209c8fb656b60abce7fccb"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#abf351bdd97209c8fb656b60abce7fccb">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(double value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:621</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_abf351bdd97209c8fb656b60abce7fccb"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#abf351bdd97209c8fb656b60abce7fccb">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(double value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:623</div></div>
 <div class="ttc" id="structTVMArgs_html"><div class="ttname"><a href="structTVMArgs.html">TVMArgs</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:38</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMMovableArgValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html">tvm::runtime::TVMMovableArgValue_</a></div><div class="ttdoc">Internal auxiliary struct for TypedPackedFunc to indicate a movable argument. </div><div class="ttdef"><b>Definition:</b> packed_func.h:523</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4_html_a621ed59fef92109e666150923621379c"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html#a621ed59fef92109e666150923621379c">tvm::runtime::PackedFuncValueConverter&lt; Optional&lt; T &gt; &gt;::From</a></div><div class="ttdeci">static Optional&lt; T &gt; From(const TVMRetValue &amp;val)</div><div class="ttdef"><b>Definition:</b>  [...]
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMMovableArgValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html">tvm::runtime::TVMMovableArgValue_</a></div><div class="ttdoc">Internal auxiliary struct for TypedPackedFunc to indicate a movable argument. </div><div class="ttdef"><b>Definition:</b> packed_func.h:525</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4_html_a621ed59fef92109e666150923621379c"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html#a621ed59fef92109e666150923621379c">tvm::runtime::PackedFuncValueConverter&lt; Optional&lt; T &gt; &gt;::From</a></div><div class="ttdeci">static Optional&lt; T &gt; From(const TVMRetValue &amp;val)</div><div class="ttdef"><b>Definition:</b>  [...]
 <div class="ttc" id="namespacetvm_html_a9da780393e228969f77aa7550520a582"><div class="ttname"><a href="namespacetvm.html#a9da780393e228969f77aa7550520a582">tvm::min</a></div><div class="ttdeci">PrimExpr min(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take minimum of two values </div></div>
 <div class="ttc" id="ndarray_8h_html"><div class="ttname"><a href="ndarray_8h.html">ndarray.h</a></div><div class="ttdoc">A device-independent managed NDArray abstraction. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_afa9327d129526d40c966fafc35255cad"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#afa9327d129526d40c966fafc35255cad">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::operator==</a></div><div class="ttdeci">bool operator==(std::nullptr_t null) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:288</div></div>
 <div class="ttc" id="namespacetvm_1_1runtime_html_a277f104e659f71cd8885744700016341"><div class="ttname"><a href="namespacetvm_1_1runtime.html#a277f104e659f71cd8885744700016341">tvm::runtime::String2DLDataType</a></div><div class="ttdeci">DLDataType String2DLDataType(std::string s)</div><div class="ttdoc">convert a string to TVM type. </div><div class="ttdef"><b>Definition:</b> data_type.h:332</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4_html_a9ac48d52f86dc3718590acc119e88741"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4.html#a9ac48d52f86dc3718590acc119e88741">tvm::runtime::PackedFuncValueConverter&lt;::tvm::runtime::String &gt;::From</a></div><div class="ttdeci">static String From(const TVMRetValue &amp;val)</div><div class="ttdef"><b>Definition:</b> [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a35455daf8c507edabf726c30de60fbed"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a35455daf8c507edabf726c30de60fbed">tvm::runtime::TVMPODValue_::IsObjectRef</a></div><div class="ttdeci">bool IsObjectRef() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1333</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ab76ba9c638e1d6db8d6f0ba8c4d38670"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ab76ba9c638e1d6db8d6f0ba8c4d38670">tvm::runtime::NDArray::FFIDataFromHandle</a></div><div class="ttdeci">static ObjectPtr&lt; Object &gt; FFIDataFromHandle(TVMArrayHandle handle)</div><div class="ttdoc">Construct NDArray&amp;#39;s Data field from array handle in FFI. </div><div class="ttdef"><b>Definition:</b> ndarray.h:368</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4_html_a9ac48d52f86dc3718590acc119e88741"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4.html#a9ac48d52f86dc3718590acc119e88741">tvm::runtime::PackedFuncValueConverter&lt;::tvm::runtime::String &gt;::From</a></div><div class="ttdeci">static String From(const TVMRetValue &amp;val)</div><div class="ttdef"><b>Definition:</b> [...]
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a35455daf8c507edabf726c30de60fbed"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a35455daf8c507edabf726c30de60fbed">tvm::runtime::TVMPODValue_::IsObjectRef</a></div><div class="ttdeci">bool IsObjectRef() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1335</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_ab76ba9c638e1d6db8d6f0ba8c4d38670"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#ab76ba9c638e1d6db8d6f0ba8c4d38670">tvm::runtime::NDArray::FFIDataFromHandle</a></div><div class="ttdeci">static ObjectPtr&lt; Object &gt; FFIDataFromHandle(TVMArrayHandle handle)</div><div class="ttdoc">Construct NDArray&amp;#39;s Data field from array handle in FFI. </div><div class="ttdef"><b>Definition:</b> ndarray.h:366</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1ObjectTypeChecker_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1ObjectTypeChecker.html">tvm::runtime::ObjectTypeChecker</a></div><div class="ttdoc">Type traits for runtime type check during FFI conversion. </div><div class="ttdef"><b>Definition:</b> packed_func.h:347</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4_html_a6748e04a16945df4c15edb53d0aaba70"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html#a6748e04a16945df4c15edb53d0aaba70">tvm::runtime::PackedFuncValueConverter&lt; Optional&lt; T &gt; &gt;::From</a></div><div class="ttdeci">static Optional&lt; T &gt; From(const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b>  [...]
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4_html_a6748e04a16945df4c15edb53d0aaba70"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html#a6748e04a16945df4c15edb53d0aaba70">tvm::runtime::PackedFuncValueConverter&lt; Optional&lt; T &gt; &gt;::From</a></div><div class="ttdeci">static Optional&lt; T &gt; From(const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b>  [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html">tvm::runtime::DataType</a></div><div class="ttdoc">Runtime primitive data type. </div><div class="ttdef"><b>Definition:</b> data_type.h:41</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html">tvm::runtime::TVMArgs</a></div><div class="ttdoc">Arguments into TVM functions. </div><div class="ttdef"><b>Definition:</b> packed_func.h:308</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ac4a3850c0989e7c2d5cd8e0f096d0997"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ac4a3850c0989e7c2d5cd8e0f096d0997">tvm::runtime::TVMRetValue::TVMRetValue</a></div><div class="ttdeci">TVMRetValue(TVMRetValue &amp;&amp;other)</div><div class="ttdoc">move constructor from anoter return value. </div><div class="ttdef"><b>Definition:</b> packed_func.h:565</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ac4a3850c0989e7c2d5cd8e0f096d0997"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ac4a3850c0989e7c2d5cd8e0f096d0997">tvm::runtime::TVMRetValue::TVMRetValue</a></div><div class="ttdeci">TVMRetValue(TVMRetValue &amp;&amp;other)</div><div class="ttdoc">move constructor from anoter return value. </div><div class="ttdef"><b>Definition:</b> packed_func.h:567</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_a8dd1fbae84cb9597c52977b0e8db64dc"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#a8dd1fbae84cb9597c52977b0e8db64dc">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::operator=</a></div><div class="ttdeci">TSelf &amp; operator=(FLambda typed_lambda)</div><div class="ttdoc">copy assignment operator from typed lambda </div><div class="ttdef"><b>Definiti [...]
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a3bab8c80e57b1ab2d7ab95962be4d2ec"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a3bab8c80e57b1ab2d7ab95962be4d2ec">kTVMNDArrayHandle</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:118</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a24a691e6243daeb46554eb692e5242ac"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a24a691e6243daeb46554eb692e5242ac">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, DataType dtype) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1122</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a24a691e6243daeb46554eb692e5242ac"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a24a691e6243daeb46554eb692e5242ac">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, DataType dtype) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1124</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_aa590b3e712e06867805b41aaf17019ed"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#aa590b3e712e06867805b41aaf17019ed">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::operator=</a></div><div class="ttdeci">TSelf &amp; operator=(PackedFunc packed)</div><div class="ttdoc">copy assignment operator from PackedFunc. </div><div class="ttdef"><b>Definition:< [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a362662e4d4b67c031ff70d4397052b29"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a362662e4d4b67c031ff70d4397052b29">tvm::runtime::TVMPODValue_::ptr</a></div><div class="ttdeci">T * ptr() const </div><div class="ttdoc">return handle as specific pointer type. </div><div class="ttdef"><b>Definition:</b> packed_func.h:432</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_a63308763010f16e39a53ffbe18b7f6fb"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#a63308763010f16e39a53ffbe18b7f6fb">tvm::runtime::PackedFunc::FType</a></div><div class="ttdeci">std::function&lt; void(TVMArgs args, TVMRetValue *rv)&gt; FType</div><div class="ttdoc">The internal std::function. </div><div class="ttdef"><b>Definition:</b> packed_func.h:95</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ad5446f5812132852387dca7335989e88"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ad5446f5812132852387dca7335989e88">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(Module m)</div><div class="ttdef"><b>Definition:</b> packed_func.h:681</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">tvm::runtime::NDArray::Container</a></div><div class="ttdoc">Object container class that backs NDArray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:245</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a4ab194932127e4b1c372e5e58e450721"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a4ab194932127e4b1c372e5e58e450721">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(PackedFunc f)</div><div class="ttdef"><b>Definition:</b> packed_func.h:685</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ad5446f5812132852387dca7335989e88"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ad5446f5812132852387dca7335989e88">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(Module m)</div><div class="ttdef"><b>Definition:</b> packed_func.h:683</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_1_1Container_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">tvm::runtime::NDArray::Container</a></div><div class="ttdoc">Object container class that backs NDArray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:243</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a4ab194932127e4b1c372e5e58e450721"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a4ab194932127e4b1c372e5e58e450721">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(PackedFunc f)</div><div class="ttdef"><b>Definition:</b> packed_func.h:687</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgs_html_abdb0f929087b14eb28d75c7a047cef9e"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgs.html#abdb0f929087b14eb28d75c7a047cef9e">tvm::runtime::TVMArgs::type_codes</a></div><div class="ttdeci">const int * type_codes</div><div class="ttdef"><b>Definition:</b> packed_func.h:311</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_ac261cdb80487fb29ac42b28678f8cbef"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#ac261cdb80487fb29ac42b28678f8cbef">tvm::runtime::ObjectRef::data_</a></div><div class="ttdeci">ObjectPtr&lt; Object &gt; data_</div><div class="ttdoc">Internal pointer that backs the reference. </div><div class="ttdef"><b>Definition:</b> object.h:561</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a2c8860df3cbdc52104dca8274abfa219"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a2c8860df3cbdc52104dca8274abfa219">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(DLDataType t)</div><div class="ttdef"><b>Definition:</b> packed_func.h:651</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a2c8860df3cbdc52104dca8274abfa219"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a2c8860df3cbdc52104dca8274abfa219">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(DLDataType t)</div><div class="ttdef"><b>Definition:</b> packed_func.h:653</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> container.h:1178</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc</a></div><div class="ttdoc">Please refer to TypedPackedFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> packed_func.h:143</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_aa55c2d96af79e07cdf0ca253ec49dc19"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#aa55c2d96af79e07cdf0ca253ec49dc19">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TObjectRef &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1164</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_aa55c2d96af79e07cdf0ca253ec49dc19"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#aa55c2d96af79e07cdf0ca253ec49dc19">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TObjectRef &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1166</div></div>
 <div class="ttc" id="packed__func_8h_html_af3c1f8f580fcc27e84b821304c39aa23"><div class="ttname"><a href="packed__func_8h.html#af3c1f8f580fcc27e84b821304c39aa23">TVM_ALWAYS_INLINE</a></div><div class="ttdeci">#define TVM_ALWAYS_INLINE</div><div class="ttdef"><b>Definition:</b> packed_func.h:54</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_afe1837bdbafe8341c2031c5cebcf6e74"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#afe1837bdbafe8341c2031c5cebcf6e74">tvm::runtime::TVMPODValue_::TVMPODValue_</a></div><div class="ttdeci">TVMPODValue_(TVMValue value, int type_code)</div><div class="ttdef"><b>Definition:</b> packed_func.h:446</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a9387f774bc8453afe4aa4cd17789a405"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a9387f774bc8453afe4aa4cd17789a405">kTVMOpaqueHandle</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:108</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a3eaf85a64828b05e95d57e958089a34d"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a3eaf85a64828b05e95d57e958089a34d">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const TypedPackedFunc&lt; FType &gt; &amp;f)</div><div class="ttdef"><b>Definition:</b> packed_func.h:694</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_aa90598e3e64b32de7ebfc5e09208afa4"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#aa90598e3e64b32de7ebfc5e09208afa4">tvm::runtime::PackedFunc::CallPacked</a></div><div class="ttdeci">void CallPacked(TVMArgs args, TVMRetValue *rv) const </div><div class="ttdoc">Call the function in packed format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:974</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a05a3312e4f335513ba0dbaefcd0c5a2c"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a05a3312e4f335513ba0dbaefcd0c5a2c">tvm::runtime::TVMRetValue::MoveFromCHost</a></div><div class="ttdeci">static TVMRetValue MoveFromCHost(TVMValue value, int type_code)</div><div class="ttdoc">Construct a new TVMRetValue by moving from return value stored via C API. </div><div class="ttdef"><b>Definition:</b> packed_func.h:73 [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a020a38f10ece8241e07c7bbc08b8d6b8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a020a38f10ece8241e07c7bbc08b8d6b8">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, std::nullptr_t value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1098</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a3eaf85a64828b05e95d57e958089a34d"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a3eaf85a64828b05e95d57e958089a34d">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const TypedPackedFunc&lt; FType &gt; &amp;f)</div><div class="ttdef"><b>Definition:</b> packed_func.h:696</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_aa90598e3e64b32de7ebfc5e09208afa4"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#aa90598e3e64b32de7ebfc5e09208afa4">tvm::runtime::PackedFunc::CallPacked</a></div><div class="ttdeci">void CallPacked(TVMArgs args, TVMRetValue *rv) const </div><div class="ttdoc">Call the function in packed format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:976</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a05a3312e4f335513ba0dbaefcd0c5a2c"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a05a3312e4f335513ba0dbaefcd0c5a2c">tvm::runtime::TVMRetValue::MoveFromCHost</a></div><div class="ttdeci">static TVMRetValue MoveFromCHost(TVMValue value, int type_code)</div><div class="ttdoc">Construct a new TVMRetValue by moving from return value stored via C API. </div><div class="ttdef"><b>Definition:</b> packed_func.h:73 [...]
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a020a38f10ece8241e07c7bbc08b8d6b8"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a020a38f10ece8241e07c7bbc08b8d6b8">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, std::nullptr_t value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1100</div></div>
 <div class="ttc" id="namespacetvm_html_ab49bad0808ba033343e72ba37b39af2e"><div class="ttname"><a href="namespacetvm.html#ab49bad0808ba033343e72ba37b39af2e">tvm::max</a></div><div class="ttdeci">PrimExpr max(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take maximum of two values </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a9581610ea91e0e38f57edc82e264a1b0"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a9581610ea91e0e38f57edc82e264a1b0">tvm::runtime::Object::GetTypeKey</a></div><div class="ttdeci">std::string GetTypeKey() const </div><div class="ttdef"><b>Definition:</b> object.h:176</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1String_html_a2ee7733b1c8092383ffab8c67bf8cb20"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html#a2ee7733b1c8092383ffab8c67bf8cb20">tvm::runtime::String::CanConvertFrom</a></div><div class="ttdeci">static bool CanConvertFrom(const TVMArgValue &amp;val)</div><div class="ttdoc">Check if a TVMArgValue can be converted to String, i.e. it can be std::string or String. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1495</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1String_html_a2ee7733b1c8092383ffab8c67bf8cb20"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html#a2ee7733b1c8092383ffab8c67bf8cb20">tvm::runtime::String::CanConvertFrom</a></div><div class="ttdeci">static bool CanConvertFrom(const TVMArgValue &amp;val)</div><div class="ttdoc">Check if a TVMArgValue can be converted to String, i.e. it can be std::string or String. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1497</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:498</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ModuleNode_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ModuleNode.html">tvm::runtime::ModuleNode</a></div><div class="ttdoc">Base container of module. </div><div class="ttdef"><b>Definition:</b> module.h:111</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_addc4ffa96d47397893755d5e7e016084"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#addc4ffa96d47397893755d5e7e016084">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const DataType &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:656</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ad362b734db5406b02e72476b7f8da779"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ad362b734db5406b02e72476b7f8da779">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(int64_t value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:636</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_addc4ffa96d47397893755d5e7e016084"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#addc4ffa96d47397893755d5e7e016084">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const DataType &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:658</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ad362b734db5406b02e72476b7f8da779"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ad362b734db5406b02e72476b7f8da779">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(int64_t value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:638</div></div>
 <div class="ttc" id="object_8h_html"><div class="ttname"><a href="object_8h.html">object.h</a></div><div class="ttdoc">A managed object in the TVM runtime. </div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_acc73129969aab00839e4d50124e0de34"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#acc73129969aab00839e4d50124e0de34">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TVMByteArray &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1134</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_acc73129969aab00839e4d50124e0de34"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#acc73129969aab00839e4d50124e0de34">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TVMByteArray &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1136</div></div>
 <div class="ttc" id="data__type_8h_html"><div class="ttname"><a href="data__type_8h.html">data_type.h</a></div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a141e032d848c60f8261046304bdc8c4c"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a141e032d848c60f8261046304bdc8c4c">tvm::runtime::NDArray::FFIGetHandle</a></div><div class="ttdeci">static TVMArrayHandle FFIGetHandle(const ObjectRef &amp;nd)</div><div class="ttdoc">Get FFI Array handle from ndarray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:373</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1NDArray_html_a141e032d848c60f8261046304bdc8c4c"><div class="ttname"><a href="classtvm_1_1runtime_1_1NDArray.html#a141e032d848c60f8261046304bdc8c4c">tvm::runtime::NDArray::FFIGetHandle</a></div><div class="ttdeci">static TVMArrayHandle FFIGetHandle(const ObjectRef &amp;nd)</div><div class="ttdoc">Get FFI Array handle from ndarray. </div><div class="ttdef"><b>Definition:</b> ndarray.h:371</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a9363bb701f16ce5bbb381f2a013d25b4"><div class="ttname"><a href="c__runtime__api_8h.html#a9363bb701f16ce5bbb381f2a013d25b4">TVMContext</a></div><div class="ttdeci">DLContext TVMContext</div><div class="ttdoc">The Device information, abstract away common device types. </div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:135</div></div>
 <div class="ttc" id="runtime_2module_8h_html"><div class="ttname"><a href="runtime_2module_8h.html">module.h</a></div><div class="ttdoc">Runtime container of the functions generated by TVM, This is used to support dynamically link...</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_acfc585ee6554c18706fcfe89d3c6df87"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#acfc585ee6554c18706fcfe89d3c6df87">tvm::runtime::PackedFunc::operator==</a></div><div class="ttdeci">bool operator==(std::nullptr_t null) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:130</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Module_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Module.html">tvm::runtime::Module</a></div><div class="ttdoc">Module container of TVM. </div><div class="ttdef"><b>Definition:</b> module.h:48</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_a4e0c70245a080d914946285ff3ebc0d7"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#a4e0c70245a080d914946285ff3ebc0d7">tvm::runtime::PackedFunc::operator()</a></div><div class="ttdeci">TVMRetValue operator()(Args &amp;&amp;...args) const </div><div class="ttdoc">Call packed function by directly passing in unpacked format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1185</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4_html_a1d57124f26aeed2583d3ae24226d89c7"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4.html#a1d57124f26aeed2583d3ae24226d89c7">tvm::runtime::PackedFuncValueConverter&lt;::tvm::runtime::String &gt;::From</a></div><div class="ttdeci">static String From(const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b> [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_a14fb895cbdceecbe566588b7467dafc3"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#a14fb895cbdceecbe566588b7467dafc3">tvm::runtime::PackedFunc::body</a></div><div class="ttdeci">FType body() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:976</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_a4e0c70245a080d914946285ff3ebc0d7"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#a4e0c70245a080d914946285ff3ebc0d7">tvm::runtime::PackedFunc::operator()</a></div><div class="ttdeci">TVMRetValue operator()(Args &amp;&amp;...args) const </div><div class="ttdoc">Call packed function by directly passing in unpacked format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1187</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4_html_a1d57124f26aeed2583d3ae24226d89c7"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4.html#a1d57124f26aeed2583d3ae24226d89c7">tvm::runtime::PackedFuncValueConverter&lt;::tvm::runtime::String &gt;::From</a></div><div class="ttdeci">static String From(const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b> [...]
+<div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_a14fb895cbdceecbe566588b7467dafc3"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#a14fb895cbdceecbe566588b7467dafc3">tvm::runtime::PackedFunc::body</a></div><div class="ttdeci">FType body() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:978</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793ada22c3d9c62fa8f40d46e80ce83b96a1"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793ada22c3d9c62fa8f40d46e80ce83b96a1">kTVMNullptr</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:109</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_ad70b44cf7f2719f1e91fe7c865790865"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#ad70b44cf7f2719f1e91fe7c865790865">tvm::runtime::PackedFunc::operator!=</a></div><div class="ttdeci">bool operator!=(std::nullptr_t null) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:132</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a25374dbab9fea2a0c482201d0e37451e"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a25374dbab9fea2a0c482201d0e37451e">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TVMArgValue &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1102</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a25374dbab9fea2a0c482201d0e37451e"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a25374dbab9fea2a0c482201d0e37451e">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const TVMArgValue &amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1104</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgValue.html">tvm::runtime::TVMArgValue</a></div><div class="ttdoc">A single argument value to PackedFunc. Containing both type_code and TVMValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:460</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html">tvm::runtime::PackedFunc</a></div><div class="ttdoc">Packed function is a type-erased function. The arguments are passed by packed format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:75</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Optional_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Optional.html">tvm::runtime::Optional</a></div><div class="ttdoc">Optional container that to represent to a Nullable variant of T. </div><div class="ttdef"><b>Definition:</b> container.h:1560</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a4fb30f18d2b15ccad43061720eb7f561"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a4fb30f18d2b15ccad43061720eb7f561">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const char *value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1125</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgValue_html_abb35e686e66c7af341584ba78a956270"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgValue.html#abb35e686e66c7af341584ba78a956270">tvm::runtime::TVMArgValue::value</a></div><div class="ttdeci">const TVMValue &amp; value() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:505</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1080</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a8efcda8c0b276ae54f31d2f21607f376"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a8efcda8c0b276ae54f31d2f21607f376">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, TObjectRef &amp;&amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1171</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a5882f7eda112e825eb5a87e45aeb85b0"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a5882f7eda112e825eb5a87e45aeb85b0">tvm::runtime::TVMArgsSetter::TVMArgsSetter</a></div><div class="ttdeci">TVMArgsSetter(TVMValue *values, int *type_codes)</div><div class="ttdef"><b>Definition:</b> packed_func.h:1082</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a4fb30f18d2b15ccad43061720eb7f561"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a4fb30f18d2b15ccad43061720eb7f561">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, const char *value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1127</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgValue_html_abb35e686e66c7af341584ba78a956270"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgValue.html#abb35e686e66c7af341584ba78a956270">tvm::runtime::TVMArgValue::value</a></div><div class="ttdeci">const TVMValue &amp; value() const </div><div class="ttdef"><b>Definition:</b> packed_func.h:507</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html">tvm::runtime::TVMArgsSetter</a></div><div class="ttdef"><b>Definition:</b> packed_func.h:1082</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a8efcda8c0b276ae54f31d2f21607f376"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a8efcda8c0b276ae54f31d2f21607f376">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, TObjectRef &amp;&amp;value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1173</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a5882f7eda112e825eb5a87e45aeb85b0"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a5882f7eda112e825eb5a87e45aeb85b0">tvm::runtime::TVMArgsSetter::TVMArgsSetter</a></div><div class="ttdeci">TVMArgsSetter(TVMValue *values, int *type_codes)</div><div class="ttdef"><b>Definition:</b> packed_func.h:1084</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_af45a2ceff92e6f6c394ea766a45027a0"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#af45a2ceff92e6f6c394ea766a45027a0">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::TypedPackedFunc</a></div><div class="ttdeci">TypedPackedFunc(std::nullptr_t null)</div><div class="ttdoc">constructor from null </div><div class="ttdef"><b>Definition:</b> packed_func.h: [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ab86bf21f214fca72e73a7f6e20ffab8d"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ab86bf21f214fca72e73a7f6e20ffab8d">tvm::runtime::TVMRetValue::TVMRetValue</a></div><div class="ttdeci">TVMRetValue(const TVMRetValue &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:585</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ab86bf21f214fca72e73a7f6e20ffab8d"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ab86bf21f214fca72e73a7f6e20ffab8d">tvm::runtime::TVMRetValue::TVMRetValue</a></div><div class="ttdeci">TVMRetValue(const TVMRetValue &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:587</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html_afde7eaa98f6cb12c7e80da16e859c210"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html#afde7eaa98f6cb12c7e80da16e859c210">tvm::runtime::PackedFunc::PackedFunc</a></div><div class="ttdeci">PackedFunc(std::nullptr_t null)</div><div class="ttdoc">constructor from null </div><div class="ttdef"><b>Definition:</b> packed_func.h:99</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a410f230315a467a6c6543d24d8cccce6"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a410f230315a467a6c6543d24d8cccce6">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, double value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1094</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a410f230315a467a6c6543d24d8cccce6"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a410f230315a467a6c6543d24d8cccce6">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, double value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1096</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgValue_html_a5fbd71750e5bbba6edc9094178af9276"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgValue.html#a5fbd71750e5bbba6edc9094178af9276">tvm::runtime::TVMArgValue::TVMArgValue</a></div><div class="ttdeci">TVMArgValue(TVMValue value, int type_code)</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:469</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ac87c22e4d49e79ce5b36b3aead667fcb"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ac87c22e4d49e79ce5b36b3aead667fcb">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const TVMRetValue &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:697</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_ac87c22e4d49e79ce5b36b3aead667fcb"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#ac87c22e4d49e79ce5b36b3aead667fcb">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(const TVMRetValue &amp;other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:699</div></div>
 <div class="ttc" id="packed__func_8h_html_ad1e150bb815d5cb964e15d4fa7b5fd93"><div class="ttname"><a href="packed__func_8h.html#ad1e150bb815d5cb964e15d4fa7b5fd93">TVM_CHECK_TYPE_CODE</a></div><div class="ttdeci">#define TVM_CHECK_TYPE_CODE(CODE, T)</div><div class="ttdef"><b>Definition:</b> packed_func.h:339</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a4b2f3688d719f6e7c20ab00659e2383e"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a4b2f3688d719f6e7c20ab00659e2383e">kTVMBytes</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:117</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a7f24de1646e5a56b405e3e95ca71dcd1"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a7f24de1646e5a56b405e3e95ca71dcd1">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, uint64_t value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1089</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a3c657d4b0f50577949521abe324c5325"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a3c657d4b0f50577949521abe324c5325">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, T value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1085</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a400ab1f50cbbbc1ab24975bf32556d16"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a400ab1f50cbbbc1ab24975bf32556d16">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(std::nullptr_t value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:626</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a7f24de1646e5a56b405e3e95ca71dcd1"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a7f24de1646e5a56b405e3e95ca71dcd1">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, uint64_t value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1091</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a3c657d4b0f50577949521abe324c5325"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a3c657d4b0f50577949521abe324c5325">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, T value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1087</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a400ab1f50cbbbc1ab24975bf32556d16"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a400ab1f50cbbbc1ab24975bf32556d16">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(std::nullptr_t value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:628</div></div>
 <div class="ttc" id="c__runtime__api_8h_html"><div class="ttname"><a href="c__runtime__api_8h.html">c_runtime_api.h</a></div></div>
 <div class="ttc" id="namespacetvm_html_a41918af1a1dc386388639a9d3ad06c5d"><div class="ttname"><a href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">tvm::DataType</a></div><div class="ttdeci">runtime::DataType DataType</div><div class="ttdef"><b>Definition:</b> data_type.h:382</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793af2b95de1a09ed39055edfe8ef5ea484d"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793af2b95de1a09ed39055edfe8ef5ea484d">kTVMStr</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:116</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a69ffbd692883aa9ded039479d8504638"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a69ffbd692883aa9ded039479d8504638">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, DLTensor *value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1110</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_afe5a60e856a30894e795d5230b0a7022"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#afe5a60e856a30894e795d5230b0a7022">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(NDArray other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:670</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">tvm::runtime::PackedFuncValueConverter</a></div><div class="ttdoc">Type trait to specify special value conversion rules from TVMArgValue and TVMRetValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:860</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html_a450c3f2c6181da37f3bc04a9fa422a98"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html#a450c3f2c6181da37f3bc04a9fa422a98">tvm::runtime::PackedFuncValueConverter::From</a></div><div class="ttdeci">static TObjectRef From(const TVMRetValue &amp;val)</div><div class="ttdoc">Convert a TObjectRef from a return value. </div><div class="ttdef"><b>Definition:</b> packed_func.h:872</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMArgsSetter_html_a69ffbd692883aa9ded039479d8504638"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgsSetter.html#a69ffbd692883aa9ded039479d8504638">tvm::runtime::TVMArgsSetter::operator()</a></div><div class="ttdeci">void operator()(size_t i, DLTensor *value) const </div><div class="ttdef"><b>Definition:</b> packed_func.h:1112</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_afe5a60e856a30894e795d5230b0a7022"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#afe5a60e856a30894e795d5230b0a7022">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(NDArray other)</div><div class="ttdef"><b>Definition:</b> packed_func.h:672</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">tvm::runtime::PackedFuncValueConverter</a></div><div class="ttdoc">Type trait to specify special value conversion rules from TVMArgValue and TVMRetValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:862</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html_a450c3f2c6181da37f3bc04a9fa422a98"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html#a450c3f2c6181da37f3bc04a9fa422a98">tvm::runtime::PackedFuncValueConverter::From</a></div><div class="ttdeci">static TObjectRef From(const TVMRetValue &amp;val)</div><div class="ttdoc">Convert a TObjectRef from a return value. </div><div class="ttdef"><b>Definition:</b> packed_func.h:874</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a8bd637c4caa55b04490fe26b250e3d86"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a8bd637c4caa55b04490fe26b250e3d86">kTVMDLTensorHandle</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:112</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4_html_a4abadc6786dd14a3aed6e2b5b342d1d6"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#a4abadc6786dd14a3aed6e2b5b342d1d6">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;::TypedPackedFunc</a></div><div class="ttdeci">TypedPackedFunc()</div><div class="ttdoc">default constructor </div><div class="ttdef"><b>Definition:</b> packed_func.h:183</div></div>
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a7c16ff5c118b1577a9188ca3f3509167"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a7c16ff5c118b1577a9188ca3f3509167">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(void *value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:631</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html_a7c16ff5c118b1577a9188ca3f3509167"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html#a7c16ff5c118b1577a9188ca3f3509167">tvm::runtime::TVMRetValue::operator=</a></div><div class="ttdeci">TVMRetValue &amp; operator=(void *value)</div><div class="ttdef"><b>Definition:</b> packed_func.h:633</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a2f46b59a6c1d5eb4575d7f583b5f1a0c"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a2f46b59a6c1d5eb4575d7f583b5f1a0c">tvm::runtime::TVMPODValue_::TVMPODValue_</a></div><div class="ttdeci">TVMPODValue_()</div><div class="ttdef"><b>Definition:</b> packed_func.h:445</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/api/doxygen/reflection_8h_source.html b/docs/api/doxygen/reflection_8h_source.html
index 5a38732..5800791 100644
--- a/docs/api/doxygen/reflection_8h_source.html
+++ b/docs/api/doxygen/reflection_8h_source.html
@@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">reflection.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="reflection_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more  [...]
+<a href="reflection_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more  [...]
 <div class="ttc" id="structtvm_1_1detail_1_1SelectVisitAttrs_html"><div class="ttname"><a href="structtvm_1_1detail_1_1SelectVisitAttrs.html">tvm::detail::SelectVisitAttrs</a></div><div class="ttdef"><b>Definition:</b> reflection.h:327</div></div>
 <div class="ttc" id="classtvm_1_1ReflectionVTable_1_1Registry_html_ac8f4637640aa9dffed745303a4cfa827"><div class="ttname"><a href="classtvm_1_1ReflectionVTable_1_1Registry.html#ac8f4637640aa9dffed745303a4cfa827">tvm::ReflectionVTable::Registry::Registry</a></div><div class="ttdeci">Registry(ReflectionVTable *parent, uint32_t type_index)</div><div class="ttdef"><b>Definition:</b> reflection.h:203</div></div>
 <div class="ttc" id="classtvm_1_1ReflectionVTable_html"><div class="ttname"><a href="classtvm_1_1ReflectionVTable.html">tvm::ReflectionVTable</a></div><div class="ttdoc">Virtual function table to support IR/AST node reflection. </div><div class="ttdef"><b>Definition:</b> reflection.h:81</div></div>
diff --git a/docs/api/doxygen/relay_2type_8h_source.html b/docs/api/doxygen/relay_2type_8h_source.html
index 07ad87c..b43cae0 100644
--- a/docs/api/doxygen/relay_2type_8h_source.html
+++ b/docs/api/doxygen/relay_2type_8h_source.html
@@ -114,7 +114,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="namespacetvm_1_1relay_html_a13fd39dbc2a639262858e9b72e8fb37f"><div class="ttname"><a href="namespacetvm_1_1relay.html#a13fd39dbc2a639262858e9b72e8fb37f">tvm::relay::RelayRefType</a></div><div class="ttdeci">tvm::RelayRefType RelayRefType</div><div class="ttdef"><b>Definition:</b> type.h:61</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_abe473e7f103d7aa63b7b09fee09df932"><div class="ttname"><a href="namespacetvm_1_1relay.html#abe473e7f103d7aa63b7b09fee09df932">tvm::relay::Any</a></div><div class="ttdeci">tvm::tir::Any Any</div><div class="ttdef"><b>Definition:</b> type.h:45</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a517ebee34b82bdb80f74ffcdcf361940"><div class="ttname"><a href="namespacetvm_1_1relay.html#a517ebee34b82bdb80f74ffcdcf361940">tvm::relay::IncompleteTypeNode</a></div><div class="ttdeci">tvm::IncompleteTypeNode IncompleteTypeNode</div><div class="ttdef"><b>Definition:</b> type.h:60</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1Any_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Any.html">tvm::tir::Any</a></div><div class="ttdoc">Managed reference to AnyNode. </div><div class="ttdef"><b>Definition:</b> expr.h:1105</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1Any_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Any.html">tvm::tir::Any</a></div><div class="ttdoc">Managed reference to AnyNode. </div><div class="ttdef"><b>Definition:</b> expr.h:1107</div></div>
 <div class="ttc" id="classtvm_1_1TupleType_html"><div class="ttname"><a href="classtvm_1_1TupleType.html">tvm::TupleType</a></div><div class="ttdoc">Managed reference to TupleTypeNode. </div><div class="ttdef"><b>Definition:</b> type.h:339</div></div>
 <div class="ttc" id="ir_2type_8h_html"><div class="ttname"><a href="ir_2type_8h.html">type.h</a></div><div class="ttdoc">IR/AST nodes for the unified type system in TVM. </div></div>
 <div class="ttc" id="tir_2expr_8h_html"><div class="ttname"><a href="tir_2expr_8h.html">expr.h</a></div><div class="ttdoc">TIR expressions. </div></div>
diff --git a/docs/api/doxygen/rocblas_8h.html b/docs/api/doxygen/rocblas_8h.html
index 76e9cad..cfbe0d7 100644
--- a/docs/api/doxygen/rocblas_8h.html
+++ b/docs/api/doxygen/rocblas_8h.html
@@ -123,6 +123,9 @@ Functions</h2></td></tr>
 <tr class="memitem:abefad1f2ad083fc038566a9ef6278dff"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff">tvm::topi::contrib::rocblas_matmul</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;lhs, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor< [...]
 <tr class="memdesc:abefad1f2ad083fc038566a9ef6278dff"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create an op that multiplies lhs and rhs with rocBLAS.  <a href="namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff">More...</a><br /></td></tr>
 <tr class="separator:abefad1f2ad083fc038566a9ef6278dff"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abf1113dd429e1285752b48f62fe12848"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848">tvm::topi::contrib::rocblas_batch_matmul</a> (const <a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> &amp;lhs, const <a class="el" href="classtvm_1_1te_1_1Tensor.html">T [...]
+<tr class="memdesc:abf1113dd429e1285752b48f62fe12848"><td class="mdescLeft">&#160;</td><td class="mdescRight">Create an op that batch multiplies lhs and rhs with rocBLAS.  <a href="namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848">More...</a><br /></td></tr>
+<tr class="separator:abf1113dd429e1285752b48f62fe12848"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table>
 </div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/api/doxygen/rocblas_8h_source.html b/docs/api/doxygen/rocblas_8h_source.html
index 80fa594..eeb885a 100644
--- a/docs/api/doxygen/rocblas_8h_source.html
+++ b/docs/api/doxygen/rocblas_8h_source.html
@@ -89,8 +89,9 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">rocblas.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="rocblas_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
+<a href="rocblas_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> autodiff.h:35</div></div>
+<div class="ttc" id="namespacetvm_1_1topi_1_1contrib_html_abf1113dd429e1285752b48f62fe12848"><div class="ttname"><a href="namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848">tvm::topi::contrib::rocblas_batch_matmul</a></div><div class="ttdeci">Tensor rocblas_batch_matmul(const Tensor &amp;lhs, const Tensor &amp;rhs, bool transa, bool transb)</div><div class="ttdoc">Create an op that batch multiplies lhs and rhs with rocBLAS. </div><div class="ttdef"><b>Definition:</b> [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contigious sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> container.h:560</div></div>
 <div class="ttc" id="detail_2extern_8h_html"><div class="ttname"><a href="detail_2extern_8h.html">extern.h</a></div><div class="ttdoc">Helpers for using external functions. </div></div>
 <div class="ttc" id="classtvm_1_1te_1_1Tensor_html"><div class="ttname"><a href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a></div><div class="ttdoc">Tensor structure representing a possible input, or intermediate computation result. </div><div class="ttdef"><b>Definition:</b> tensor.h:103</div></div>
diff --git a/docs/api/doxygen/runtime_2module_8h_source.html b/docs/api/doxygen/runtime_2module_8h_source.html
index 829056f..7865cc5 100644
--- a/docs/api/doxygen/runtime_2module_8h_source.html
+++ b/docs/api/doxygen/runtime_2module_8h_source.html
@@ -90,7 +90,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 <a href="runtime_2module_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or  [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1Module_html_a1233f7b896bb299ef07f9e41a4ffdc17"><div class="ttname"><a href="classtvm_1_1runtime_1_1Module.html#a1233f7b896bb299ef07f9e41a4ffdc17">tvm::runtime::Module::GetFunction</a></div><div class="ttdeci">PackedFunc GetFunction(const std::string &amp;name, bool query_imports=false)</div><div class="ttdoc">Get packed function from current module by name. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1459</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1Module_html_a1233f7b896bb299ef07f9e41a4ffdc17"><div class="ttname"><a href="classtvm_1_1runtime_1_1Module.html#a1233f7b896bb299ef07f9e41a4ffdc17">tvm::runtime::Module::GetFunction</a></div><div class="ttdeci">PackedFunc GetFunction(const std::string &amp;name, bool query_imports=false)</div><div class="ttdoc">Get packed function from current module by name. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1461</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Module_html_afb5a6c9b0e0a541245958456ebe39260"><div class="ttname"><a href="classtvm_1_1runtime_1_1Module.html#afb5a6c9b0e0a541245958456ebe39260">tvm::runtime::Module::Import</a></div><div class="ttdeci">void Import(Module other)</div><div class="ttdoc">Import another module into this module. </div><div class="ttdef"><b>Definition:</b> module.h:233</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html">tvm::runtime::ObjectPtr</a></div><div class="ttdoc">A custom smart pointer for Object. </div><div class="ttdef"><b>Definition:</b> object.h:350</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1TypeIndex_html_aed93c7318efc8052201d4c404b21a40da8ce1dbb49ce074502049c6909061a2ae"><div class="ttname"><a href="structtvm_1_1runtime_1_1TypeIndex.html#aed93c7318efc8052201d4c404b21a40da8ce1dbb49ce074502049c6909061a2ae">tvm::runtime::TypeIndex::kRuntimeModule</a></div><div class="ttdoc">runtime::Module. </div><div class="ttdef"><b>Definition:</b> object.h:62</div></div>
diff --git a/docs/api/doxygen/search/all_12.js b/docs/api/doxygen/search/all_12.js
index 6be6702..3413764 100644
--- a/docs/api/doxygen/search/all_12.js
+++ b/docs/api/doxygen/search/all_12.js
@@ -159,8 +159,8 @@ var searchData=
   ['reverse_5fiterator',['reverse_iterator',['../classtvm_1_1runtime_1_1Array.html#a4886f1509998e380f032896a5afb27b9',1,'tvm::runtime::Array']]],
   ['reverse_5fsequence',['reverse_sequence',['../namespacetvm_1_1topi.html#ab8ad5eed3079de21c92a7639ed370096',1,'tvm::topi']]],
   ['reverseattrs',['ReverseAttrs',['../structtvm_1_1relay_1_1ReverseAttrs.html',1,'tvm::relay']]],
-  ['reverseiteradapter',['ReverseIterAdapter',['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a579235eb3691b76d29b4ae5f178318ef',1,'tvm::runtime::ReverseIterAdapter']]],
   ['reverseiteradapter',['ReverseIterAdapter',['../classtvm_1_1runtime_1_1ReverseIterAdapter.html',1,'tvm::runtime']]],
+  ['reverseiteradapter',['ReverseIterAdapter',['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a579235eb3691b76d29b4ae5f178318ef',1,'tvm::runtime::ReverseIterAdapter']]],
   ['reversesequenceattrs',['ReverseSequenceAttrs',['../structtvm_1_1relay_1_1ReverseSequenceAttrs.html',1,'tvm::relay']]],
   ['rewrite',['Rewrite',['../classtvm_1_1relay_1_1MixedModeMutator.html#a4c93a9094db80cace013ef02e6bcd724',1,'tvm::relay::MixedModeMutator::Rewrite()'],['../classtvm_1_1relay_1_1ExprRewriter.html#a28cebb8decbe035ff95683c45f69e53b',1,'tvm::relay::ExprRewriter::Rewrite()']]],
   ['rewrite_5f',['Rewrite_',['../classtvm_1_1relay_1_1MixedModeMutator.html#a3b53908f4b8cc3708ca75892e47f0929',1,'tvm::relay::MixedModeMutator::Rewrite_(const TupleNode *pre, const Expr &amp;post)'],['../classtvm_1_1relay_1_1MixedModeMutator.html#aedab19fa2803a80d4148f83c1c4b0814',1,'tvm::relay::MixedModeMutator::Rewrite_(const CallNode *pre, const Expr &amp;post)'],['../classtvm_1_1relay_1_1MixedModeMutator.html#a2424d6590fceb087cb1624ab8d3348a1',1,'tvm::relay::MixedModeMutator::Rewrite [...]
@@ -178,6 +178,7 @@ var searchData=
   ['right',['right',['../classtvm_1_1relay_1_1AltPatternNode.html#a01262756211205f8764d07f2372c5d36',1,'tvm::relay::AltPatternNode']]],
   ['right_5fshift',['right_shift',['../namespacetvm_1_1topi.html#a9673b9caffb46404b566c3f04a492dfe',1,'tvm::topi::right_shift(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#aec8705eed0238733dc89e2a34465e9d0',1,'tvm::topi::right_shift(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &amp;B, std::string name=&quot;T_&quot;&quot;right_shift&quot;, std::string tag=kBroadcast)'],['../namespacetvm_1_1topi.html#af4d241b8705ec1d62785900224f77aab',1,'t [...]
   ['rocblas_2eh',['rocblas.h',['../rocblas_8h.html',1,'']]],
+  ['rocblas_5fbatch_5fmatmul',['rocblas_batch_matmul',['../namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848',1,'tvm::topi::contrib']]],
   ['rocblas_5fmatmul',['rocblas_matmul',['../namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff',1,'tvm::topi::contrib']]],
   ['roialignattrs',['ROIAlignAttrs',['../structtvm_1_1relay_1_1ROIAlignAttrs.html',1,'tvm::relay']]],
   ['roipoolattrs',['ROIPoolAttrs',['../structtvm_1_1relay_1_1ROIPoolAttrs.html',1,'tvm::relay']]],
diff --git a/docs/api/doxygen/search/functions_12.js b/docs/api/doxygen/search/functions_12.js
index 53a79d9..093b71d 100644
--- a/docs/api/doxygen/search/functions_12.js
+++ b/docs/api/doxygen/search/functions_12.js
@@ -68,6 +68,7 @@ var searchData=
   ['rfactor',['rfactor',['../classtvm_1_1auto__scheduler_1_1State.html#a21c27b06d439267f8b981fa05c5f48a0',1,'tvm::auto_scheduler::State::rfactor()'],['../classtvm_1_1te_1_1Schedule.html#a34ae85add41bbed0140726d024d08862',1,'tvm::te::Schedule::rfactor()']]],
   ['rfactorstep',['RfactorStep',['../classtvm_1_1auto__scheduler_1_1RfactorStep.html#a26e6f85b55307f18fab4469e3bd4be0c',1,'tvm::auto_scheduler::RfactorStep::RfactorStep(int stage_id, int iter_id, int factor_iter_id)'],['../classtvm_1_1auto__scheduler_1_1RfactorStep.html#a95575c21441177634178245ab562cb4f',1,'tvm::auto_scheduler::RfactorStep::RfactorStep(dmlc::JSONReader *reader)']]],
   ['right_5fshift',['right_shift',['../namespacetvm_1_1topi.html#a9673b9caffb46404b566c3f04a492dfe',1,'tvm::topi::right_shift(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#aec8705eed0238733dc89e2a34465e9d0',1,'tvm::topi::right_shift(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &amp;B, std::string name=&quot;T_&quot;&quot;right_shift&quot;, std::string tag=kBroadcast)'],['../namespacetvm_1_1topi.html#af4d241b8705ec1d62785900224f77aab',1,'t [...]
+  ['rocblas_5fbatch_5fmatmul',['rocblas_batch_matmul',['../namespacetvm_1_1topi_1_1contrib.html#abf1113dd429e1285752b48f62fe12848',1,'tvm::topi::contrib']]],
   ['rocblas_5fmatmul',['rocblas_matmul',['../namespacetvm_1_1topi_1_1contrib.html#abefad1f2ad083fc038566a9ef6278dff',1,'tvm::topi::contrib']]],
   ['root_5fiter_5fvars',['root_iter_vars',['../classtvm_1_1te_1_1OperationNode.html#a8d15cfe7d0d721da305c1b36e9f5a914',1,'tvm::te::OperationNode::root_iter_vars()'],['../classtvm_1_1te_1_1PlaceholderOpNode.html#aed3620e14c76716f976ffec15a68f074',1,'tvm::te::PlaceholderOpNode::root_iter_vars()'],['../classtvm_1_1te_1_1BaseComputeOpNode.html#aab7b5b43122ee14bb00640906267361a',1,'tvm::te::BaseComputeOpNode::root_iter_vars()'],['../classtvm_1_1te_1_1ScanOpNode.html#a7a2670bdbf28281b2a8d977e4 [...]
   ['round',['round',['../namespacetvm.html#a660170263d6864b1caa60728619971be',1,'tvm::round()'],['../namespacetvm_1_1topi.html#ac8101cdce02816930697ab74213ff059',1,'tvm::topi::round()']]],
diff --git a/docs/api/doxygen/target__kind_8h_source.html b/docs/api/doxygen/target__kind_8h_source.html
index 318d84f..096a0b5 100644
--- a/docs/api/doxygen/target__kind_8h_source.html
+++ b/docs/api/doxygen/target__kind_8h_source.html
@@ -90,7 +90,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 </div><!--header-->
 <div class="contents">
 <a href="target__kind_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mor [...]
-<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:557</div></div>
+<div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:559</div></div>
 <div class="ttc" id="structtvm_1_1detail_1_1is__specialized_html"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized.html">tvm::detail::is_specialized</a></div><div class="ttdef"><b>Definition:</b> target_kind.h:228</div></div>
 <div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a2995c32e12246e892f7f4cb621a2819c"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a2995c32e12246e892f7f4cb621a2819c">tvm::TargetKindRegEntry::set_default_keys</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_default_keys(std::vector&lt; String &gt; keys)</div><div class="ttdoc">Set DLPack&amp;#39;s device_type the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:310</div></div>
 <div class="ttc" id="node_8h_html"><div class="ttname"><a href="node_8h.html">node.h</a></div><div class="ttdoc">Definitions and helper macros for IR/AST nodes. </div></div>
diff --git a/docs/api/doxygen/tir_2expr_8h_source.html b/docs/api/doxygen/tir_2expr_8h_source.html
index 1a73e7f..ed01482 100644
--- a/docs/api/doxygen/tir_2expr_8h_source.html
+++ b/docs/api/doxygen/tir_2expr_8h_source.html
@@ -89,7 +89,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="title">expr.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="tir_2expr_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
+<a href="tir_2expr_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more c [...]
 <div class="ttc" id="classtvm_1_1tir_1_1BroadcastNode_html_ac2a82fc08ca7841339b4bf7330a36580"><div class="ttname"><a href="classtvm_1_1tir_1_1BroadcastNode.html#ac2a82fc08ca7841339b4bf7330a36580">tvm::tir::BroadcastNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const BroadcastNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:804</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ReduceNode_html_ab8d18e8f464db03aa61dd3856952af34"><div class="ttname"><a href="classtvm_1_1tir_1_1ReduceNode.html#ab8d18e8f464db03aa61dd3856952af34">tvm::tir::ReduceNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const ReduceNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:1051</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1LetNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1LetNode.html">tvm::tir::LetNode</a></div><div class="ttdoc">Let binding. Bind var to value then evaluate body. </div><div class="ttdef"><b>Definition:</b> expr.h:831</div></div>
@@ -104,7 +104,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1tir_1_1LetNode_html_a21fdb9beb794933ee1094774ddfd2ee2"><div class="ttname"><a href="classtvm_1_1tir_1_1LetNode.html#a21fdb9beb794933ee1094774ddfd2ee2">tvm::tir::LetNode::var</a></div><div class="ttdeci">Var var</div><div class="ttdoc">The variable. </div><div class="ttdef"><b>Definition:</b> expr.h:834</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BinaryOpNode_html_a2b8c5fdbb2edc200484b7195ad68acf6"><div class="ttname"><a href="classtvm_1_1tir_1_1BinaryOpNode.html#a2b8c5fdbb2edc200484b7195ad68acf6">tvm::tir::BinaryOpNode::b</a></div><div class="ttdeci">PrimExpr b</div><div class="ttdoc">The right operand. </div><div class="ttdef"><b>Definition:</b> expr.h:128</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1LoadNode_html_af27e6b2dc6293dffaa93e992085be458"><div class="ttname"><a href="classtvm_1_1tir_1_1LoadNode.html#af27e6b2dc6293dffaa93e992085be458">tvm::tir::LoadNode::predicate</a></div><div class="ttdeci">PrimExpr predicate</div><div class="ttdoc">The predicate to mask which lanes would be loaded. </div><div class="ttdef"><b>Definition:</b> expr.h:704</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1AnyNode_html_aa4619aecf23ceefa065436ad0c001cba"><div class="ttname"><a href="classtvm_1_1tir_1_1AnyNode.html#aa4619aecf23ceefa065436ad0c001cba">tvm::tir::AnyNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const </div><div class="ttdef"><b>Definition:</b> expr.h:1092</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1AnyNode_html_aa4619aecf23ceefa065436ad0c001cba"><div class="ttname"><a href="classtvm_1_1tir_1_1AnyNode.html#aa4619aecf23ceefa065436ad0c001cba">tvm::tir::AnyNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const </div><div class="ttdef"><b>Definition:</b> expr.h:1094</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BinaryOpNode_html_a743b7846b1376cb2d8e4b821d9cb9eaa"><div class="ttname"><a href="classtvm_1_1tir_1_1BinaryOpNode.html#a743b7846b1376cb2d8e4b821d9cb9eaa">tvm::tir::BinaryOpNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const T *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:136</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1LoadNode_html_acc4597323aba2761dafaf9a24bb4c0e4"><div class="ttname"><a href="classtvm_1_1tir_1_1LoadNode.html#acc4597323aba2761dafaf9a24bb4c0e4">tvm::tir::LoadNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:706</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1RampNode_html_a57d0d4129dc260fc2a79e50ad2dc3e68"><div class="ttname"><a href="classtvm_1_1tir_1_1RampNode.html#a57d0d4129dc260fc2a79e50ad2dc3e68">tvm::tir::RampNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:757</div></div>
@@ -157,7 +157,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1tir_1_1ShuffleNode_html_af764cbab54a79f4f7f3d5e1ddd9e1e5f"><div class="ttname"><a href="classtvm_1_1tir_1_1ShuffleNode.html#af764cbab54a79f4f7f3d5e1ddd9e1e5f">tvm::tir::ShuffleNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:930</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1AnyNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1AnyNode.html">tvm::tir::AnyNode</a></div><div class="ttdoc">Any shape. </div><div class="ttdef"><b>Definition:</b> expr.h:1086</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ShuffleNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1ShuffleNode.html">tvm::tir::ShuffleNode</a></div><div class="ttdoc">Shuffle instruction. vec = concat(vectors) result = (vec[indices[0]], vec[indices[1]] ...</div><div class="ttdef"><b>Definition:</b> expr.h:923</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_abb777c4f4d61883c0cbdf9116458b70d"><div class="ttname"><a href="namespacetvm_1_1tir.html#abb777c4f4d61883c0cbdf9116458b70d">tvm::tir::as_unordered_map</a></div><div class="ttdeci">std::unordered_map&lt; K, V &gt; as_unordered_map(const Map&lt; K, V &gt; &amp;dmap)</div><div class="ttdef"><b>Definition:</b> expr.h:1121</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_abb777c4f4d61883c0cbdf9116458b70d"><div class="ttname"><a href="namespacetvm_1_1tir.html#abb777c4f4d61883c0cbdf9116458b70d">tvm::tir::as_unordered_map</a></div><div class="ttdeci">std::unordered_map&lt; K, V &gt; as_unordered_map(const Map&lt; K, V &gt; &amp;dmap)</div><div class="ttdef"><b>Definition:</b> expr.h:1123</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1CallNode_html_a4e631c0b21801e30d92d23a1c3de23bc"><div class="ttname"><a href="classtvm_1_1tir_1_1CallNode.html#a4e631c0b21801e30d92d23a1c3de23bc">tvm::tir::CallNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const CallNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:894</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1FloorMod_html"><div class="ttname"><a href="classtvm_1_1tir_1_1FloorMod.html">tvm::tir::FloorMod</a></div><div class="ttdoc">Managed reference to FloorModNode. </div><div class="ttdef"><b>Definition:</b> expr.h:261</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1AddNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1AddNode.html">tvm::tir::AddNode</a></div><div class="ttdoc">a + b </div><div class="ttdef"><b>Definition:</b> expr.h:150</div></div>
@@ -176,7 +176,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1tir_1_1ShuffleNode_html_aa444b61cf671ef31060535c61746ab6c"><div class="ttname"><a href="classtvm_1_1tir_1_1ShuffleNode.html#aa444b61cf671ef31060535c61746ab6c">tvm::tir::ShuffleNode::vectors</a></div><div class="ttdeci">Array&lt; PrimExpr &gt; vectors</div><div class="ttdoc">the input vectors. </div><div class="ttdef"><b>Definition:</b> expr.h:926</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1CallNode_html_ab16f6a3e97ab7e8ec0d2e906fd5a2f71"><div class="ttname"><a href="classtvm_1_1tir_1_1CallNode.html#ab16f6a3e97ab7e8ec0d2e906fd5a2f71">tvm::tir::CallNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:888</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BinaryOpNode_html_a2fd1d01ff422a1765b27f1da1b7af4ce"><div class="ttname"><a href="classtvm_1_1tir_1_1BinaryOpNode.html#a2fd1d01ff422a1765b27f1da1b7af4ce">tvm::tir::BinaryOpNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const </div><div class="ttdef"><b>Definition:</b> expr.h:140</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1Any_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Any.html">tvm::tir::Any</a></div><div class="ttdoc">Managed reference to AnyNode. </div><div class="ttdef"><b>Definition:</b> expr.h:1105</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1Any_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Any.html">tvm::tir::Any</a></div><div class="ttdoc">Managed reference to AnyNode. </div><div class="ttdef"><b>Definition:</b> expr.h:1107</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Div_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Div.html">tvm::tir::Div</a></div><div class="ttdoc">Managed reference to DivNode. </div><div class="ttdef"><b>Definition:</b> expr.h:210</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1NE_html"><div class="ttname"><a href="classtvm_1_1tir_1_1NE.html">tvm::tir::NE</a></div><div class="ttdoc">Managed reference to NENode. </div><div class="ttdef"><b>Definition:</b> expr.h:356</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1CommReducerNode_html_a7a679454a48aeb1def1ab92c9f2c923d"><div class="ttname"><a href="classtvm_1_1tir_1_1CommReducerNode.html#a7a679454a48aeb1def1ab92c9f2c923d">tvm::tir::CommReducerNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const CommReducerNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:992</div></div>
@@ -187,7 +187,7 @@ var searchBox = new SearchBox("searchBox", "search",false,'Search');
 <div class="ttc" id="classtvm_1_1tir_1_1Mod_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Mod.html">tvm::tir::Mod</a></div><div class="ttdoc">Managed reference to ModNode. </div><div class="ttdef"><b>Definition:</b> expr.h:229</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1StringImmNode_html_a1838338eb7520d37a2a779358521287b"><div class="ttname"><a href="classtvm_1_1tir_1_1StringImmNode.html#a1838338eb7520d37a2a779358521287b">tvm::tir::StringImmNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const StringImmNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:61</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1RampNode_html_aed36222cfc75644cdb8b7619cc9b69eb"><div class="ttname"><a href="classtvm_1_1tir_1_1RampNode.html#aed36222cfc75644cdb8b7619cc9b69eb">tvm::tir::RampNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const RampNode *other, SEqualReducer equal) const </div><div class="ttdef"><b>Definition:</b> expr.h:764</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1AnyNode_html_ac4acdd164527dde5e3f6afe37ee4f707"><div class="ttname"><a href="classtvm_1_1tir_1_1AnyNode.html#ac4acdd164527dde5e3f6afe37ee4f707">tvm::tir::AnyNode::ToVar</a></div><div class="ttdeci">Var ToVar() const </div><div class="ttdoc">Convert to var. </div><div class="ttdef"><b>Definition:</b> expr.h:1095</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1AnyNode_html_ac4acdd164527dde5e3f6afe37ee4f707"><div class="ttname"><a href="classtvm_1_1tir_1_1AnyNode.html#ac4acdd164527dde5e3f6afe37ee4f707">tvm::tir::AnyNode::ToVar</a></div><div class="ttdeci">Var ToVar() const </div><div class="ttdoc">Convert to var. </div><div class="ttdef"><b>Definition:</b> expr.h:1097</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1CmpOpNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1CmpOpNode.html">tvm::tir::CmpOpNode</a></div><div class="ttdoc">Base template to implement comparison ops. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ReduceNode_html_a5dcdd28aac37f4313ad18065bf824e11"><div class="ttname"><a href="classtvm_1_1tir_1_1ReduceNode.html#a5dcdd28aac37f4313ad18065bf824e11">tvm::tir::ReduceNode::combiner</a></div><div class="ttdeci">CommReducer combiner</div><div class="ttdoc">The commutative combiner. </div><div class="ttdef"><b>Definition:</b> expr.h:1026</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ReduceNode_html_ab1b5e256aabba3307ccfeb291bb58771"><div class="ttname"><a href="classtvm_1_1tir_1_1ReduceNode.html#ab1b5e256aabba3307ccfeb291bb58771">tvm::tir::ReduceNode::source</a></div><div class="ttdeci">Array&lt; PrimExpr &gt; source</div><div class="ttdoc">The source operand. </div><div class="ttdef"><b>Definition:</b> expr.h:1028</div></div>
diff --git a/docs/api/javadoc/org/apache/tvm/class-use/Function.html b/docs/api/javadoc/org/apache/tvm/class-use/Function.html
index ac4f579..6a49602 100644
--- a/docs/api/javadoc/org/apache/tvm/class-use/Function.html
+++ b/docs/api/javadoc/org/apache/tvm/class-use/Function.html
@@ -134,26 +134,26 @@
 </tr>
 <tr class="altColor">
 <td class="colFirst"><code>static <a href="../../../../org/apache/tvm/Function.html" title="class in org.apache.tvm">Function</a></code></td>
-<td class="colLast"><span class="typeNameLabel">API.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/API.html#get-java.lang.String-">get</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>
+<td class="colLast"><span class="typeNameLabel">APIInternal.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/APIInternal.html#get-java.lang.String-">get</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>
 <div class="block">Get a tvm api function according by name.</div>
 </td>
 </tr>
 <tr class="rowColor">
 <td class="colFirst"><code>static <a href="../../../../org/apache/tvm/Function.html" title="class in org.apache.tvm">Function</a></code></td>
-<td class="colLast"><span class="typeNameLabel">APIInternal.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/APIInternal.html#get-java.lang.String-">get</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>
+<td class="colLast"><span class="typeNameLabel">API.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/API.html#get-java.lang.String-">get</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>
 <div class="block">Get a tvm api function according by name.</div>
 </td>
 </tr>
 <tr class="altColor">
+<td class="colFirst"><code><a href="../../../../org/apache/tvm/Function.html" title="class in org.apache.tvm">Function</a></code></td>
+<td class="colLast"><span class="typeNameLabel">Module.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/Module.html#getFunction-java.lang.String-">getFunction</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>&nbsp;</td>
+</tr>
+<tr class="rowColor">
 <td class="colFirst"><code>static <a href="../../../../org/apache/tvm/Function.html" title="class in org.apache.tvm">Function</a></code></td>
 <td class="colLast"><span class="typeNameLabel">Function.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/Function.html#getFunction-java.lang.String-">getFunction</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>
 <div class="block">Get registered function.</div>
 </td>
 </tr>
-<tr class="rowColor">
-<td class="colFirst"><code><a href="../../../../org/apache/tvm/Function.html" title="class in org.apache.tvm">Function</a></code></td>
-<td class="colLast"><span class="typeNameLabel">Module.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/Module.html#getFunction-java.lang.String-">getFunction</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name)</code>&nbsp;</td>
-</tr>
 <tr class="altColor">
 <td class="colFirst"><code><a href="../../../../org/apache/tvm/Function.html" title="class in org.apache.tvm">Function</a></code></td>
 <td class="colLast"><span class="typeNameLabel">Module.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/Module.html#getFunction-java.lang.String-boolean-">getFunction</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;name,
diff --git a/docs/api/javadoc/org/apache/tvm/class-use/Module.html b/docs/api/javadoc/org/apache/tvm/class-use/Module.html
index 4ca74db..2adfa65 100644
--- a/docs/api/javadoc/org/apache/tvm/class-use/Module.html
+++ b/docs/api/javadoc/org/apache/tvm/class-use/Module.html
@@ -112,14 +112,14 @@
 <tbody>
 <tr class="altColor">
 <td class="colFirst"><code><a href="../../../../org/apache/tvm/Module.html" title="class in org.apache.tvm">Module</a></code></td>
-<td class="colLast"><span class="typeNameLabel">TVMValue.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/TVMValue.html#asModule--">asModule</a></span>()</code>&nbsp;</td>
-</tr>
-<tr class="rowColor">
-<td class="colFirst"><code><a href="../../../../org/apache/tvm/Module.html" title="class in org.apache.tvm">Module</a></code></td>
 <td class="colLast"><span class="typeNameLabel">Module.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/Module.html#asModule--">asModule</a></span>()</code>
 <div class="block">Easy for user to get the instance from returned TVMValue.</div>
 </td>
 </tr>
+<tr class="rowColor">
+<td class="colFirst"><code><a href="../../../../org/apache/tvm/Module.html" title="class in org.apache.tvm">Module</a></code></td>
+<td class="colLast"><span class="typeNameLabel">TVMValue.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/TVMValue.html#asModule--">asModule</a></span>()</code>&nbsp;</td>
+</tr>
 <tr class="altColor">
 <td class="colFirst"><code>static <a href="../../../../org/apache/tvm/Module.html" title="class in org.apache.tvm">Module</a></code></td>
 <td class="colLast"><span class="typeNameLabel">Module.</span><code><span class="memberNameLink"><a href="../../../../org/apache/tvm/Module.html#load-java.lang.String-">load</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;path)</code>&nbsp;</td>
diff --git a/docs/api/links.html b/docs/api/links.html
index 686c0ef..6261d49 100644
--- a/docs/api/links.html
+++ b/docs/api/links.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Links to Other API References &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Links to Other API References &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/auto_scheduler.html b/docs/api/python/auto_scheduler.html
index 7bf6496..72a9fc2 100644
--- a/docs/api/python/auto_scheduler.html
+++ b/docs/api/python/auto_scheduler.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.auto_scheduler &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.auto_scheduler &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -407,7 +407,7 @@ Can be the a function or the function name.</p></li>
 
 <dl class="py function">
 <dt id="tvm.auto_scheduler.auto_schedule">
-<code class="sig-prename descclassname">tvm.auto_scheduler.</code><code class="sig-name descname">auto_schedule</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">task</span></em>, <em class="sig-param"><span class="n">search_policy</span><span class="o">=</span><span class="default_value">None</span></em>, <em class="sig-param"><span class="n">tuning_options</span><span class="o">=</span><span class="default_value">auto_scheduler.TuningOptions(48909616)</span></ [...]
+<code class="sig-prename descclassname">tvm.auto_scheduler.</code><code class="sig-name descname">auto_schedule</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">task</span></em>, <em class="sig-param"><span class="n">search_policy</span><span class="o">=</span><span class="default_value">None</span></em>, <em class="sig-param"><span class="n">tuning_options</span><span class="o">=</span><span class="default_value">auto_scheduler.TuningOptions(44759744)</span></ [...]
 <dd><p>Run auto scheduling search for a task</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
@@ -775,7 +775,7 @@ This function can be used to pre-train the cost model with history log files.
 
 <dl class="py class">
 <dt id="tvm.auto_scheduler.LocalBuilder">
-<em class="property">class </em><code class="sig-prename descclassname">tvm.auto_scheduler.</code><code class="sig-name descname">LocalBuilder</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">timeout</span><span class="o">=</span><span class="default_value">15</span></em>, <em class="sig-param"><span class="n">n_parallel</span><span class="o">=</span><span class="default_value">8</span></em>, <em class="sig-param"><span class="n">build_func</span><span class="o [...]
+<em class="property">class </em><code class="sig-prename descclassname">tvm.auto_scheduler.</code><code class="sig-name descname">LocalBuilder</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">timeout</span><span class="o">=</span><span class="default_value">15</span></em>, <em class="sig-param"><span class="n">n_parallel</span><span class="o">=</span><span class="default_value">16</span></em>, <em class="sig-param"><span class="n">build_func</span><span class=" [...]
 <dd><p>LocalBuilder use local CPU cores to build programs in parallel.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters</dt>
@@ -1027,7 +1027,7 @@ the init state of ComputeDAG.</p>
 
 <dl class="py class">
 <dt id="tvm.auto_scheduler.SketchPolicy">
-<em class="property">class </em><code class="sig-prename descclassname">tvm.auto_scheduler.</code><code class="sig-name descname">SketchPolicy</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">task</span></em>, <em class="sig-param"><span class="n">program_cost_model</span><span class="o">=</span><span class="default_value">auto_scheduler.RandomModel(48912744)</span></em>, <em class="sig-param"><span class="n">params</span><span class="o">=</span><span class="de [...]
+<em class="property">class </em><code class="sig-prename descclassname">tvm.auto_scheduler.</code><code class="sig-name descname">SketchPolicy</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">task</span></em>, <em class="sig-param"><span class="n">program_cost_model</span><span class="o">=</span><span class="default_value">auto_scheduler.RandomModel(44967288)</span></em>, <em class="sig-param"><span class="n">params</span><span class="o">=</span><span class="de [...]
 <dd><p>The search policy that searches in a hierarchical search space defined by sketches.
 The policy randomly samples programs from the space defined by sketches and use evolutionary
 search to fine-tune them.</p>
diff --git a/docs/api/python/autotvm.html b/docs/api/python/autotvm.html
index 95e171b..942c2f5 100644
--- a/docs/api/python/autotvm.html
+++ b/docs/api/python/autotvm.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.autotvm &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.autotvm &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/contrib.html b/docs/api/python/contrib.html
index 6c84f51..f2f14a5 100644
--- a/docs/api/python/contrib.html
+++ b/docs/api/python/contrib.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.contrib &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.contrib &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -999,6 +999,28 @@ samples are drawn.</p></li>
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt id="tvm.contrib.rocblas.batch_matmul">
+<code class="sig-prename descclassname">tvm.contrib.rocblas.</code><code class="sig-name descname">batch_matmul</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">lhs</span></em>, <em class="sig-param"><span class="n">rhs</span></em>, <em class="sig-param"><span class="n">transa</span><span class="o">=</span><span class="default_value">False</span></em>, <em class="sig-param"><span class="n">transb</span><span class="o">=</span><span class="default_value">False</ [...]
+<dd><p>Create an extern op that compute matrix mult of A and rhs with rocBLAS</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters</dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>lhs</strong> (<a class="reference internal" href="te.html#tvm.te.Tensor" title="tvm.te.Tensor"><em>Tensor</em></a>) – The left batched matrix operand</p></li>
+<li><p><strong>rhs</strong> (<a class="reference internal" href="te.html#tvm.te.Tensor" title="tvm.te.Tensor"><em>Tensor</em></a>) – The right batched matrix operand</p></li>
+<li><p><strong>transa</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – Whether transpose lhs</p></li>
+<li><p><strong>transb</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.8)"><em>bool</em></a>) – Whether transpose rhs</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns</dt>
+<dd class="field-even"><p><strong>C</strong> – The result tensor.</p>
+</dd>
+<dt class="field-odd">Return type</dt>
+<dd class="field-odd"><p><a class="reference internal" href="te.html#tvm.te.Tensor" title="tvm.te.Tensor">Tensor</a></p>
+</dd>
+</dl>
+</dd></dl>
+
 </div>
 <div class="section" id="module-tvm.contrib.rocm">
 <span id="tvm-contrib-rocm"></span><h2>tvm.contrib.rocm<a class="headerlink" href="#module-tvm.contrib.rocm" title="Permalink to this headline">¶</a></h2>
diff --git a/docs/api/python/driver.html b/docs/api/python/driver.html
index c184236..7167bf8 100644
--- a/docs/api/python/driver.html
+++ b/docs/api/python/driver.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.driver &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.driver &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/error.html b/docs/api/python/error.html
index c6fe211..e7e6afc 100644
--- a/docs/api/python/error.html
+++ b/docs/api/python/error.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.error &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.error &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/graph_runtime.html b/docs/api/python/graph_runtime.html
index 48f45f0..ccf0002 100644
--- a/docs/api/python/graph_runtime.html
+++ b/docs/api/python/graph_runtime.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.contrib.graph_runtime &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.contrib.graph_runtime &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/index.html b/docs/api/python/index.html
index 960f9a3..9df6427 100644
--- a/docs/api/python/index.html
+++ b/docs/api/python/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Python API &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Python API &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/ir.html b/docs/api/python/ir.html
index ea57bad..dcb392d 100644
--- a/docs/api/python/ir.html
+++ b/docs/api/python/ir.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.ir &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.ir &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/micro.html b/docs/api/python/micro.html
index 2f51b37..fd34846 100644
--- a/docs/api/python/micro.html
+++ b/docs/api/python/micro.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.micro &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.micro &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/ndarray.html b/docs/api/python/ndarray.html
index 8e700b1..dc6eb6b 100644
--- a/docs/api/python/ndarray.html
+++ b/docs/api/python/ndarray.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.runtime.ndarray &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.runtime.ndarray &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/analysis.html b/docs/api/python/relay/analysis.html
index 7bdbb17..9e41e5e 100644
--- a/docs/api/python/relay/analysis.html
+++ b/docs/api/python/relay/analysis.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.analysis &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.analysis &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/backend.html b/docs/api/python/relay/backend.html
index 42d79fa..fb29b23 100644
--- a/docs/api/python/relay/backend.html
+++ b/docs/api/python/relay/backend.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.backend &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.backend &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/dataflow_pattern.html b/docs/api/python/relay/dataflow_pattern.html
index 838b140..5b05651 100644
--- a/docs/api/python/relay/dataflow_pattern.html
+++ b/docs/api/python/relay/dataflow_pattern.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.dataflow_pattern &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.dataflow_pattern &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/frontend.html b/docs/api/python/relay/frontend.html
index deac16f..66f0e96 100644
--- a/docs/api/python/relay/frontend.html
+++ b/docs/api/python/relay/frontend.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.frontend &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.frontend &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/image.html b/docs/api/python/relay/image.html
index 7e38265..98351c6 100644
--- a/docs/api/python/relay/image.html
+++ b/docs/api/python/relay/image.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.image &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.image &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/index.html b/docs/api/python/relay/index.html
index 9386c23..1abdb62 100644
--- a/docs/api/python/relay/index.html
+++ b/docs/api/python/relay/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/nn.html b/docs/api/python/relay/nn.html
index 6b0805e..541295c 100644
--- a/docs/api/python/relay/nn.html
+++ b/docs/api/python/relay/nn.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.nn &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.nn &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/testing.html b/docs/api/python/relay/testing.html
index c26f152..697f6e6 100644
--- a/docs/api/python/relay/testing.html
+++ b/docs/api/python/relay/testing.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.testing &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.testing &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/transform.html b/docs/api/python/relay/transform.html
index e35485a..967702a 100644
--- a/docs/api/python/relay/transform.html
+++ b/docs/api/python/relay/transform.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.transform &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.transform &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/relay/vision.html b/docs/api/python/relay/vision.html
index 0269331..901beec 100644
--- a/docs/api/python/relay/vision.html
+++ b/docs/api/python/relay/vision.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.relay.vision &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.relay.vision &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/rpc.html b/docs/api/python/rpc.html
index 4a408d1..5e72180 100644
--- a/docs/api/python/rpc.html
+++ b/docs/api/python/rpc.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.rpc &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.rpc &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/runtime.html b/docs/api/python/runtime.html
index a0ecf5a..61d41b7 100644
--- a/docs/api/python/runtime.html
+++ b/docs/api/python/runtime.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.runtime &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.runtime &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/target.html b/docs/api/python/target.html
index 974212f..cfe6fc7 100644
--- a/docs/api/python/target.html
+++ b/docs/api/python/target.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.target &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.target &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/te.html b/docs/api/python/te.html
index 0ab299d..402aff0 100644
--- a/docs/api/python/te.html
+++ b/docs/api/python/te.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.te &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.te &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/tir.html b/docs/api/python/tir.html
index 69a667f..96f88ad 100644
--- a/docs/api/python/tir.html
+++ b/docs/api/python/tir.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.tir &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.tir &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/topi.html b/docs/api/python/topi.html
index f463e7d..530e078 100644
--- a/docs/api/python/topi.html
+++ b/docs/api/python/topi.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>tvm.topi &mdash; tvm 0.7.dev1 documentation</title>
+  <title>tvm.topi &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/python/vta/index.html b/docs/api/python/vta/index.html
index 8d99451..1fc525f 100644
--- a/docs/api/python/vta/index.html
+++ b/docs/api/python/vta/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>vta &mdash; tvm 0.7.dev1 documentation</title>
+  <title>vta &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/api/typedoc/assets/js/main.js b/docs/api/typedoc/assets/js/main.js
index 39a8066..fe9fac3 100644
--- a/docs/api/typedoc/assets/js/main.js
+++ b/docs/api/typedoc/assets/js/main.js
@@ -1 +1 @@
-!function(){var e=function(t){var r=new e.Builder;return r.pipeline.add(e.trimmer,e.stopWordFilter,e.stemmer),r.searchPipeline.add(e.stemmer),t.call(r,r),r.build()};e.version="2.3.7",e.utils={},e.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),e.utils.asString=function(e){return null==e?"":e.toString()},e.utils.clone=function(e){if(null==e)return e;for(var t=Object.create(null),r=Object.keys(e),i=0;i<r.length;i++){var n=r[i],s=e[n];if(Array.isAr [...]
\ No newline at end of file
+!function(){var e=function(t){var r=new e.Builder;return r.pipeline.add(e.trimmer,e.stopWordFilter,e.stemmer),r.searchPipeline.add(e.stemmer),t.call(r,r),r.build()};e.version="2.3.7",e.utils={},e.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),e.utils.asString=function(e){return null==e?"":e.toString()},e.utils.clone=function(e){if(null==e)return e;for(var t=Object.create(null),r=Object.keys(e),i=0;i<r.length;i++){var n=r[i],s=e[n];if(Array.isAr [...]
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/bytestreamreader.html b/docs/api/typedoc/classes/bytestreamreader.html
index 94cab45..2b20bd5 100644
--- a/docs/api/typedoc/classes/bytestreamreader.html
+++ b/docs/api/typedoc/classes/bytestreamreader.html
@@ -119,7 +119,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -141,7 +141,7 @@
 					<div class="tsd-signature tsd-kind-icon">bytes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Uint8Array</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -151,7 +151,7 @@
 					<div class="tsd-signature tsd-kind-icon">offset<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 0</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L42">rpc_server.ts:42</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L42">rpc_server.ts:42</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -168,7 +168,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L63">rpc_server.ts:63</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L63">rpc_server.ts:63</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">Uint8Array</span></h4>
@@ -185,7 +185,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L49">rpc_server.ts:49</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L49">rpc_server.ts:49</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -202,7 +202,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L57">rpc_server.ts:57</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L57">rpc_server.ts:57</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -273,5 +273,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/cachedcallstack.html b/docs/api/typedoc/classes/cachedcallstack.html
index c49bfe5..7202461 100644
--- a/docs/api/typedoc/classes/cachedcallstack.html
+++ b/docs/api/typedoc/classes/cachedcallstack.html
@@ -144,7 +144,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L223">memory.ts:223</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L223">memory.ts:223</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -172,7 +172,7 @@
 					<div class="tsd-signature tsd-kind-icon">temp<wbr>Args<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><a href="../interfaces/disposable.html" class="tsd-signature-type">Disposable</a><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = []</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L208">memory.ts:208</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L208">memory.ts:208</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -194,7 +194,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L312">memory.ts:312</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L312">memory.ts:312</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -226,7 +226,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L284">memory.ts:284</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L284">memory.ts:284</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -262,7 +262,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L388">memory.ts:388</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L388">memory.ts:388</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -300,7 +300,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L376">memory.ts:376</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L376">memory.ts:376</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -340,7 +340,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L267">memory.ts:267</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L267">memory.ts:267</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -373,7 +373,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L243">memory.ts:243</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L243">memory.ts:243</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -390,7 +390,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L321">memory.ts:321</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L321">memory.ts:321</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -422,7 +422,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L252">memory.ts:252</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L252">memory.ts:252</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -444,7 +444,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L359">memory.ts:359</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L359">memory.ts:359</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -470,7 +470,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L342">memory.ts:342</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L342">memory.ts:342</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -496,7 +496,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L350">memory.ts:350</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L350">memory.ts:350</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -522,7 +522,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L326">memory.ts:326</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L326">memory.ts:326</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -548,7 +548,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L363">memory.ts:363</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L363">memory.ts:363</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -574,7 +574,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L346">memory.ts:346</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L346">memory.ts:346</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -600,7 +600,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L334">memory.ts:334</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L334">memory.ts:334</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -713,5 +713,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/dlcontext.html b/docs/api/typedoc/classes/dlcontext.html
index 8a0dbf4..d6e90ac 100644
--- a/docs/api/typedoc/classes/dlcontext.html
+++ b/docs/api/typedoc/classes/dlcontext.html
@@ -118,7 +118,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L203">runtime.ts:203</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L203">runtime.ts:203</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -146,7 +146,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<wbr>Id<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L201">runtime.ts:201</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L201">runtime.ts:201</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -161,7 +161,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<wbr>Type<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L199">runtime.ts:199</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L199">runtime.ts:199</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -183,7 +183,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L224">runtime.ts:224</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L224">runtime.ts:224</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -205,7 +205,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L231">runtime.ts:231</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L231">runtime.ts:231</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">string</span></h4>
@@ -273,5 +273,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/dldatatype.html b/docs/api/typedoc/classes/dldatatype.html
index 4abbd67..7d6bd31 100644
--- a/docs/api/typedoc/classes/dldatatype.html
+++ b/docs/api/typedoc/classes/dldatatype.html
@@ -119,7 +119,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L263">runtime.ts:263</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L263">runtime.ts:263</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -147,7 +147,7 @@
 					<div class="tsd-signature tsd-kind-icon">bits<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L261">runtime.ts:261</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L261">runtime.ts:261</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">code<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L259">runtime.ts:259</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L259">runtime.ts:259</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -177,7 +177,7 @@
 					<div class="tsd-signature tsd-kind-icon">lanes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L263">runtime.ts:263</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L263">runtime.ts:263</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -199,7 +199,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L280">runtime.ts:280</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L280">runtime.ts:280</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -216,7 +216,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L271">runtime.ts:271</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L271">runtime.ts:271</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">string</span></h4>
@@ -287,5 +287,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/environment.html b/docs/api/typedoc/classes/environment.html
index 99184ca..2f66340 100644
--- a/docs/api/typedoc/classes/environment.html
+++ b/docs/api/typedoc/classes/environment.html
@@ -125,7 +125,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L86">environment.ts:86</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L86">environment.ts:86</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -169,7 +169,7 @@
 					<aside class="tsd-sources">
 						<p>Implementation of <a href="../interfaces/libraryprovider.html">LibraryProvider</a>.<a href="../interfaces/libraryprovider.html#imports">imports</a></p>
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L70">environment.ts:70</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L70">environment.ts:70</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 					<div class="tsd-signature tsd-kind-icon">logger<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>msg<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L69">environment.ts:69</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L69">environment.ts:69</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-type-declaration">
@@ -210,7 +210,7 @@
 					<div class="tsd-signature tsd-kind-icon">packedCFunc<wbr>Table<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">ctypes.FTVMWasmPackedCFunc</span><span class="tsd-signature-symbol"> | </span><span class="tsd-signature-type">undefined</span><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = [undefined,]</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L78">environment.ts:78</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L78">environment.ts:78</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -228,7 +228,7 @@
 					<div class="tsd-signature tsd-kind-icon">packedCFunc<wbr>Table<wbr>Free<wbr>Id<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">number</span><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = []</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L84">environment.ts:84</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L84">environment.ts:84</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -250,7 +250,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L105">environment.ts:105</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L105">environment.ts:105</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -332,5 +332,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/ffilibrary.html b/docs/api/typedoc/classes/ffilibrary.html
index 1ac18a8..3a4dfb3 100644
--- a/docs/api/typedoc/classes/ffilibrary.html
+++ b/docs/api/typedoc/classes/ffilibrary.html
@@ -131,7 +131,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L49">runtime.ts:49</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L49">runtime.ts:49</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -156,7 +156,7 @@
 					<div class="tsd-signature tsd-kind-icon">exports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">Function</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L46">runtime.ts:46</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L46">runtime.ts:46</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -166,7 +166,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L45">runtime.ts:45</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L45">runtime.ts:45</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -176,7 +176,7 @@
 					<div class="tsd-signature tsd-kind-icon">wasm32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">boolean</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L44">runtime.ts:44</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L44">runtime.ts:44</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -186,7 +186,7 @@
 					<div class="tsd-signature tsd-kind-icon">webGPUContext<span class="tsd-signature-symbol">:</span> <a href="webgpucontext.html" class="tsd-signature-type">WebGPUContext</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L47">runtime.ts:47</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L47">runtime.ts:47</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -203,7 +203,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L76">runtime.ts:76</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L76">runtime.ts:76</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -226,7 +226,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L66">runtime.ts:66</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L66">runtime.ts:66</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -243,7 +243,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L84">runtime.ts:84</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L84">runtime.ts:84</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <a href="cachedcallstack.html" class="tsd-signature-type">CachedCallStack</a></h4>
@@ -260,7 +260,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L95">runtime.ts:95</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L95">runtime.ts:95</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -283,7 +283,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L72">runtime.ts:72</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L72">runtime.ts:72</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -366,5 +366,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/graphruntime.html b/docs/api/typedoc/classes/graphruntime.html
index f67104a..1bd8d04 100644
--- a/docs/api/typedoc/classes/graphruntime.html
+++ b/docs/api/typedoc/classes/graphruntime.html
@@ -130,7 +130,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L584">runtime.ts:584</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L584">runtime.ts:584</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">module<span class="tsd-signature-symbol">:</span> <a href="module.html" class="tsd-signature-type">Module</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L580">runtime.ts:580</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L580">runtime.ts:580</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L655">runtime.ts:655</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L655">runtime.ts:655</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -224,7 +224,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L598">runtime.ts:598</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L598">runtime.ts:598</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -241,7 +241,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L632">runtime.ts:632</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L632">runtime.ts:632</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -279,7 +279,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L645">runtime.ts:645</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L645">runtime.ts:645</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -310,7 +310,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L622">runtime.ts:622</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L622">runtime.ts:622</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -332,7 +332,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L610">runtime.ts:610</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L610">runtime.ts:610</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -429,5 +429,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/instance.html b/docs/api/typedoc/classes/instance.html
index bd6b3f7..d94e570 100644
--- a/docs/api/typedoc/classes/instance.html
+++ b/docs/api/typedoc/classes/instance.html
@@ -139,7 +139,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L693">runtime.ts:693</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L693">runtime.ts:693</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -202,7 +202,7 @@
 					<div class="tsd-signature tsd-kind-icon">exports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">Function</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L685">runtime.ts:685</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L685">runtime.ts:685</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -212,7 +212,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L684">runtime.ts:684</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L684">runtime.ts:684</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -229,7 +229,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L925">runtime.ts:925</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L925">runtime.ts:925</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -267,7 +267,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L933">runtime.ts:933</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L933">runtime.ts:933</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -298,7 +298,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L995">runtime.ts:995</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L995">runtime.ts:995</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -341,7 +341,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L733">runtime.ts:733</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L733">runtime.ts:733</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -358,7 +358,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L953">runtime.ts:953</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L953">runtime.ts:953</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -402,7 +402,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L817">runtime.ts:817</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L817">runtime.ts:817</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -434,7 +434,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L1038">runtime.ts:1038</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L1038">runtime.ts:1038</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -465,7 +465,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L847">runtime.ts:847</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L847">runtime.ts:847</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -497,7 +497,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L751">runtime.ts:751</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L751">runtime.ts:751</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -520,7 +520,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L1018">runtime.ts:1018</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L1018">runtime.ts:1018</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -568,7 +568,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L790">runtime.ts:790</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L790">runtime.ts:790</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -608,7 +608,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L915">runtime.ts:915</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L915">runtime.ts:915</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -646,7 +646,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L1139">runtime.ts:1139</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L1139">runtime.ts:1139</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -698,7 +698,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L741">runtime.ts:741</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L741">runtime.ts:741</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -722,7 +722,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L869">runtime.ts:869</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L869">runtime.ts:869</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -754,7 +754,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L858">runtime.ts:858</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L858">runtime.ts:858</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -786,7 +786,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L941">runtime.ts:941</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L941">runtime.ts:941</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -913,5 +913,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/memory.html b/docs/api/typedoc/classes/memory.html
index 77ebabc..7b9a6f2 100644
--- a/docs/api/typedoc/classes/memory.html
+++ b/docs/api/typedoc/classes/memory.html
@@ -130,7 +130,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L40">memory.ts:40</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L40">memory.ts:40</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -152,7 +152,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Memory</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L32">memory.ts:32</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L32">memory.ts:32</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">wasm32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">boolean</span><span class="tsd-signature-symbol"> = true</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L33">memory.ts:33</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L33">memory.ts:33</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L154">memory.ts:154</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L154">memory.ts:154</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -210,7 +210,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L90">memory.ts:90</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L90">memory.ts:90</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -233,7 +233,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L97">memory.ts:97</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L97">memory.ts:97</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -256,7 +256,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L74">memory.ts:74</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L74">memory.ts:74</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -279,7 +279,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L81">memory.ts:81</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L81">memory.ts:81</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -302,7 +302,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L104">memory.ts:104</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L104">memory.ts:104</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -325,7 +325,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L132">memory.ts:132</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L132">memory.ts:132</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -362,7 +362,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L145">memory.ts:145</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L145">memory.ts:145</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -393,7 +393,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L60">memory.ts:60</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L60">memory.ts:60</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -416,7 +416,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L67">memory.ts:67</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L67">memory.ts:67</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -439,7 +439,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L53">memory.ts:53</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L53">memory.ts:53</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -462,7 +462,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L114">memory.ts:114</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L114">memory.ts:114</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -485,7 +485,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L124">memory.ts:124</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L124">memory.ts:124</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -502,7 +502,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/memory.ts#L175">memory.ts:175</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/memory.ts#L175">memory.ts:175</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -626,5 +626,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/module.html b/docs/api/typedoc/classes/module.html
index 29deab2..fc84b58 100644
--- a/docs/api/typedoc/classes/module.html
+++ b/docs/api/typedoc/classes/module.html
@@ -124,7 +124,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L505">runtime.ts:505</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L505">runtime.ts:505</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -170,7 +170,7 @@
 					<div class="tsd-signature tsd-kind-icon">handle<span class="tsd-signature-symbol">:</span> <a href="../index.html#pointer" class="tsd-signature-type">Pointer</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L503">runtime.ts:503</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L503">runtime.ts:503</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -187,7 +187,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L517">runtime.ts:517</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L517">runtime.ts:517</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -204,7 +204,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L531">runtime.ts:531</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L531">runtime.ts:531</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -236,7 +236,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L562">runtime.ts:562</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L562">runtime.ts:562</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -318,5 +318,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/ndarray.html b/docs/api/typedoc/classes/ndarray.html
index e738715..bd05bc1 100644
--- a/docs/api/typedoc/classes/ndarray.html
+++ b/docs/api/typedoc/classes/ndarray.html
@@ -130,7 +130,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L305">runtime.ts:305</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L305">runtime.ts:305</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -158,7 +158,7 @@
 					<div class="tsd-signature tsd-kind-icon">context<span class="tsd-signature-symbol">:</span> <a href="dlcontext.html" class="tsd-signature-type">DLContext</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L298">runtime.ts:298</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L298">runtime.ts:298</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -173,7 +173,7 @@
 					<div class="tsd-signature tsd-kind-icon">dtype<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L294">runtime.ts:294</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L294">runtime.ts:294</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -188,7 +188,7 @@
 					<div class="tsd-signature tsd-kind-icon">handle<span class="tsd-signature-symbol">:</span> <a href="../index.html#pointer" class="tsd-signature-type">Pointer</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L290">runtime.ts:290</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L290">runtime.ts:290</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -203,7 +203,7 @@
 					<div class="tsd-signature tsd-kind-icon">ndim<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L292">runtime.ts:292</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L292">runtime.ts:292</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -218,7 +218,7 @@
 					<div class="tsd-signature tsd-kind-icon">shape<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">number</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L296">runtime.ts:296</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L296">runtime.ts:296</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -240,7 +240,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L371">runtime.ts:371</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L371">runtime.ts:371</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -273,7 +273,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L415">runtime.ts:415</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L415">runtime.ts:415</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -305,7 +305,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L356">runtime.ts:356</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L356">runtime.ts:356</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -322,7 +322,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L475">runtime.ts:475</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L475">runtime.ts:475</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -346,7 +346,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L444">runtime.ts:444</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L444">runtime.ts:444</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -438,5 +438,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/packedfunccell.html b/docs/api/typedoc/classes/packedfunccell.html
index ee65bd3..0e119a8 100644
--- a/docs/api/typedoc/classes/packedfunccell.html
+++ b/docs/api/typedoc/classes/packedfunccell.html
@@ -122,7 +122,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L158">runtime.ts:158</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L158">runtime.ts:158</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -147,7 +147,7 @@
 					<div class="tsd-signature tsd-kind-icon">handle<span class="tsd-signature-symbol">:</span> <a href="../index.html#pointer" class="tsd-signature-type">Pointer</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L157">runtime.ts:157</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L157">runtime.ts:157</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -164,7 +164,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L165">runtime.ts:165</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L165">runtime.ts:165</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -226,5 +226,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/rpcserver.html b/docs/api/typedoc/classes/rpcserver.html
index 396bf75..28cf848 100644
--- a/docs/api/typedoc/classes/rpcserver.html
+++ b/docs/api/typedoc/classes/rpcserver.html
@@ -115,7 +115,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L92">rpc_server.ts:92</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L92">rpc_server.ts:92</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -176,7 +176,7 @@
 					<div class="tsd-signature tsd-kind-icon">get<wbr>Imports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">unknown</span><span class="tsd-signat [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L82">rpc_server.ts:82</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L82">rpc_server.ts:82</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-type-declaration">
@@ -201,7 +201,7 @@
 					<div class="tsd-signature tsd-kind-icon">key<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L78">rpc_server.ts:78</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L78">rpc_server.ts:78</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -211,7 +211,7 @@
 					<div class="tsd-signature tsd-kind-icon">logger<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>msg<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L81">rpc_server.ts:81</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L81">rpc_server.ts:81</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-type-declaration">
@@ -242,7 +242,7 @@
 					<div class="tsd-signature tsd-kind-icon">socket<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">WebSocket</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L79">rpc_server.ts:79</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L79">rpc_server.ts:79</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -252,7 +252,7 @@
 					<div class="tsd-signature tsd-kind-icon">state<span class="tsd-signature-symbol">:</span> <a href="../enums/rpcserverstate.html" class="tsd-signature-type">RPCServerState</a><span class="tsd-signature-symbol"> = RPCServerState.InitHeader</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L80">rpc_server.ts:80</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L80">rpc_server.ts:80</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -262,7 +262,7 @@
 					<div class="tsd-signature tsd-kind-icon">url<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L77">rpc_server.ts:77</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L77">rpc_server.ts:77</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -333,5 +333,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/scalar.html b/docs/api/typedoc/classes/scalar.html
index ebe2f06..88b49f7 100644
--- a/docs/api/typedoc/classes/scalar.html
+++ b/docs/api/typedoc/classes/scalar.html
@@ -112,7 +112,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L145">runtime.ts:145</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L145">runtime.ts:145</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -137,7 +137,7 @@
 					<div class="tsd-signature tsd-kind-icon">dtype<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L145">runtime.ts:145</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L145">runtime.ts:145</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -152,7 +152,7 @@
 					<div class="tsd-signature tsd-kind-icon">value<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L143">runtime.ts:143</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L143">runtime.ts:143</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -216,5 +216,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/classes/webgpucontext.html b/docs/api/typedoc/classes/webgpucontext.html
index d0d5adc..8830303 100644
--- a/docs/api/typedoc/classes/webgpucontext.html
+++ b/docs/api/typedoc/classes/webgpucontext.html
@@ -120,7 +120,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L57">webgpu.ts:57</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L57">webgpu.ts:57</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -145,7 +145,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">GPUDevice</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L50">webgpu.ts:50</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L50">webgpu.ts:50</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -155,7 +155,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L51">webgpu.ts:51</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L51">webgpu.ts:51</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -172,7 +172,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L84">webgpu.ts:84</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L84">webgpu.ts:84</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -209,7 +209,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L170">webgpu.ts:170</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L170">webgpu.ts:170</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -238,7 +238,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L67">webgpu.ts:67</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L67">webgpu.ts:67</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -314,5 +314,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/enums/argtypecode.html b/docs/api/typedoc/enums/argtypecode.html
index 6a6b5da..2f75b3a 100644
--- a/docs/api/typedoc/enums/argtypecode.html
+++ b/docs/api/typedoc/enums/argtypecode.html
@@ -106,7 +106,7 @@
 					<div class="tsd-signature tsd-kind-icon">Float<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 2</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L216">ctypes.ts:216</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L216">ctypes.ts:216</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -116,7 +116,7 @@
 					<div class="tsd-signature tsd-kind-icon">Int<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 0</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L214">ctypes.ts:214</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L214">ctypes.ts:214</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -126,7 +126,7 @@
 					<div class="tsd-signature tsd-kind-icon">Null<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 4</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L218">ctypes.ts:218</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L218">ctypes.ts:218</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -136,7 +136,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMBytes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 12</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L226">ctypes.ts:226</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L226">ctypes.ts:226</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -146,7 +146,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMContext<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 6</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L220">ctypes.ts:220</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L220">ctypes.ts:220</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -156,7 +156,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMDLTensor<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 7</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L221">ctypes.ts:221</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L221">ctypes.ts:221</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -166,7 +166,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMData<wbr>Type<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 5</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L219">ctypes.ts:219</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L219">ctypes.ts:219</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -176,7 +176,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMModule<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 9</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L223">ctypes.ts:223</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L223">ctypes.ts:223</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -186,7 +186,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMNDArray<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 13</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L227">ctypes.ts:227</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L227">ctypes.ts:227</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -196,7 +196,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMObject<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 8</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L222">ctypes.ts:222</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L222">ctypes.ts:222</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -206,7 +206,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMObjectRValue<wbr>Ref<wbr>Arg<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 14</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L228">ctypes.ts:228</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L228">ctypes.ts:228</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -216,7 +216,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMOpaque<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 3</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L217">ctypes.ts:217</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L217">ctypes.ts:217</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -226,7 +226,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMPacked<wbr>Func<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 10</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L224">ctypes.ts:224</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L224">ctypes.ts:224</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -236,7 +236,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMStr<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 11</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L225">ctypes.ts:225</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L225">ctypes.ts:225</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -246,7 +246,7 @@
 					<div class="tsd-signature tsd-kind-icon">UInt<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 1</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L215">ctypes.ts:215</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L215">ctypes.ts:215</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -341,5 +341,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/enums/aynccallbackcode.html b/docs/api/typedoc/enums/aynccallbackcode.html
index 776ca27..0c86bce 100644
--- a/docs/api/typedoc/enums/aynccallbackcode.html
+++ b/docs/api/typedoc/enums/aynccallbackcode.html
@@ -93,7 +93,7 @@
 					<div class="tsd-signature tsd-kind-icon">k<wbr>Exception<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 5</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L677">runtime.ts:677</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L677">runtime.ts:677</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -103,7 +103,7 @@
 					<div class="tsd-signature tsd-kind-icon">k<wbr>Return<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 4</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L676">runtime.ts:676</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L676">runtime.ts:676</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -159,5 +159,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/enums/dldatatypecode.html b/docs/api/typedoc/enums/dldatatypecode.html
index 0de2a8b..1b01631 100644
--- a/docs/api/typedoc/enums/dldatatypecode.html
+++ b/docs/api/typedoc/enums/dldatatypecode.html
@@ -95,7 +95,7 @@
 					<div class="tsd-signature tsd-kind-icon">Float<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 2</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L243">runtime.ts:243</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L243">runtime.ts:243</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -105,7 +105,7 @@
 					<div class="tsd-signature tsd-kind-icon">Int<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 0</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L241">runtime.ts:241</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L241">runtime.ts:241</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -115,7 +115,7 @@
 					<div class="tsd-signature tsd-kind-icon">Opaque<wbr>Handle<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 3</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L244">runtime.ts:244</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L244">runtime.ts:244</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -125,7 +125,7 @@
 					<div class="tsd-signature tsd-kind-icon">UInt<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 1</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L242">runtime.ts:242</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L242">runtime.ts:242</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -187,5 +187,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/enums/rpcserverstate.html b/docs/api/typedoc/enums/rpcserverstate.html
index 0627129..ce773f5 100644
--- a/docs/api/typedoc/enums/rpcserverstate.html
+++ b/docs/api/typedoc/enums/rpcserverstate.html
@@ -90,7 +90,7 @@
 					<div class="tsd-signature tsd-kind-icon">Init<wbr>Header<span class="tsd-signature-symbol">:</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L27">rpc_server.ts:27</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L27">rpc_server.ts:27</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -100,7 +100,7 @@
 					<div class="tsd-signature tsd-kind-icon">Init<wbr>Header<wbr>Key<span class="tsd-signature-symbol">:</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L28">rpc_server.ts:28</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L28">rpc_server.ts:28</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -110,7 +110,7 @@
 					<div class="tsd-signature tsd-kind-icon">Init<wbr>Server<span class="tsd-signature-symbol">:</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L29">rpc_server.ts:29</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L29">rpc_server.ts:29</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -120,7 +120,7 @@
 					<div class="tsd-signature tsd-kind-icon">Receive<wbr>Packet<wbr>Body<span class="tsd-signature-symbol">:</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L32">rpc_server.ts:32</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L32">rpc_server.ts:32</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -130,7 +130,7 @@
 					<div class="tsd-signature tsd-kind-icon">Receive<wbr>Packet<wbr>Header<span class="tsd-signature-symbol">:</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L31">rpc_server.ts:31</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L31">rpc_server.ts:31</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -140,7 +140,7 @@
 					<div class="tsd-signature tsd-kind-icon">Wait<wbr>For<wbr>Callback<span class="tsd-signature-symbol">:</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L30">rpc_server.ts:30</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L30">rpc_server.ts:30</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -208,5 +208,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/enums/sizeof.html b/docs/api/typedoc/enums/sizeof.html
index 257803c..56d9777 100644
--- a/docs/api/typedoc/enums/sizeof.html
+++ b/docs/api/typedoc/enums/sizeof.html
@@ -100,7 +100,7 @@
 					<div class="tsd-signature tsd-kind-icon">DLContext<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = I32 + I32</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L207">ctypes.ts:207</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L207">ctypes.ts:207</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -110,7 +110,7 @@
 					<div class="tsd-signature tsd-kind-icon">DLData<wbr>Type<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = I32</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L206">ctypes.ts:206</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L206">ctypes.ts:206</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -120,7 +120,7 @@
 					<div class="tsd-signature tsd-kind-icon">F32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 4</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L203">ctypes.ts:203</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L203">ctypes.ts:203</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -130,7 +130,7 @@
 					<div class="tsd-signature tsd-kind-icon">F64<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 8</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L204">ctypes.ts:204</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L204">ctypes.ts:204</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -140,7 +140,7 @@
 					<div class="tsd-signature tsd-kind-icon">I32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 4</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L201">ctypes.ts:201</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L201">ctypes.ts:201</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -150,7 +150,7 @@
 					<div class="tsd-signature tsd-kind-icon">I64<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 8</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L202">ctypes.ts:202</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L202">ctypes.ts:202</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -160,7 +160,7 @@
 					<div class="tsd-signature tsd-kind-icon">TVMValue<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 8</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L205">ctypes.ts:205</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L205">ctypes.ts:205</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -170,7 +170,7 @@
 					<div class="tsd-signature tsd-kind-icon">U16<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 2</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L200">ctypes.ts:200</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L200">ctypes.ts:200</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -180,7 +180,7 @@
 					<div class="tsd-signature tsd-kind-icon">U8<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol"> = 1</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L199">ctypes.ts:199</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L199">ctypes.ts:199</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -257,5 +257,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/index.html b/docs/api/typedoc/index.html
index 98b9c52..20078b6 100644
--- a/docs/api/typedoc/index.html
+++ b/docs/api/typedoc/index.html
@@ -174,7 +174,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMArray<wbr>Alloc<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>shape<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, ndim<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</span>, dtypeCode<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</span>, dtypeBits<span class="tsd [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L112">ctypes.ts:112</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L112">ctypes.ts:112</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -238,7 +238,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMArray<wbr>Copy<wbr>From<wbr>Bytes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>handle<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, data<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, nbytes<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">num [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L128">ctypes.ts:128</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L128">ctypes.ts:128</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -282,7 +282,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMArray<wbr>Copy<wbr>From<wbr>To<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>from<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, to<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, stream<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-sig [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L144">ctypes.ts:144</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L144">ctypes.ts:144</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -326,7 +326,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMArray<wbr>Copy<wbr>ToBytes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>handle<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, data<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, nbytes<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</sp [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L136">ctypes.ts:136</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L136">ctypes.ts:136</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -370,7 +370,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMArray<wbr>Free<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>handle<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L121">ctypes.ts:121</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L121">ctypes.ts:121</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -406,7 +406,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMBackend<wbr>PackedCFunc<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>argValues<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, argCodes<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, nargs<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number< [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L160">ctypes.ts:160</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L160">ctypes.ts:160</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -458,7 +458,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMCFunc<wbr>Set<wbr>Return<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>ret<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, value<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, typeCode<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signa [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L77">ctypes.ts:77</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L77">ctypes.ts:77</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -506,7 +506,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMCb<wbr>Arg<wbr>ToReturn<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>value<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, code<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span c [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L83">ctypes.ts:83</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L83">ctypes.ts:83</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -545,7 +545,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMFunc<wbr>Call<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>func<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, argValues<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, typeCode<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-t [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L67">ctypes.ts:67</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L67">ctypes.ts:67</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -601,7 +601,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMFunc<wbr>Free<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>func<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L57">ctypes.ts:57</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L57">ctypes.ts:57</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -637,7 +637,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMFunc<wbr>Get<wbr>Global<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>name<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, out<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span cla [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L100">ctypes.ts:100</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L100">ctypes.ts:100</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -676,7 +676,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMFunc<wbr>List<wbr>Global<wbr>Names<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>outSize<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, outArray<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&g [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L88">ctypes.ts:88</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L88">ctypes.ts:88</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -715,7 +715,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMFunc<wbr>Register<wbr>Global<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>name<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, f<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, override<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</spa [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L94">ctypes.ts:94</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L94">ctypes.ts:94</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -758,7 +758,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMGet<wbr>Last<wbr>Error<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L34">ctypes.ts:34</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L34">ctypes.ts:34</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -788,7 +788,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMMod<wbr>Free<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>mod<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L52">ctypes.ts:52</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L52">ctypes.ts:52</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -824,7 +824,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMMod<wbr>Get<wbr>Function<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>mod<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, funcName<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, queryImports<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">numbe [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L42">ctypes.ts:42</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L42">ctypes.ts:42</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -872,7 +872,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMMod<wbr>Import<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>mod<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, dep<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-si [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L48">ctypes.ts:48</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L48">ctypes.ts:48</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -912,7 +912,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMSynchronize<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>deviceType<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</span>, deviceId<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</span>, stream<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signatur [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L150">ctypes.ts:150</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L150">ctypes.ts:150</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -954,7 +954,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMWasm<wbr>Alloc<wbr>Space<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>size<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L167">ctypes.ts:167</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L167">ctypes.ts:167</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -990,7 +990,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMWasm<wbr>Free<wbr>Space<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>ptr<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L170">ctypes.ts:170</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L170">ctypes.ts:170</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1026,7 +1026,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMWasm<wbr>Func<wbr>Create<wbr>FromCFunc<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>resource<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, out<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&g [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L187">ctypes.ts:187</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L187">ctypes.ts:187</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1066,7 +1066,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMWasm<wbr>PackedCFunc<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>args<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, typeCodes<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a>, nargs<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">number</span>, [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L179">ctypes.ts:179</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L179">ctypes.ts:179</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1118,7 +1118,7 @@
 					<div class="tsd-signature tsd-kind-icon">FTVMWasm<wbr>PackedCFunc<wbr>Finalizer<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>resourceHandle<span class="tsd-signature-symbol">: </span><a href="index.html#pointer" class="tsd-signature-type">Pointer</a><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L193">ctypes.ts:193</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L193">ctypes.ts:193</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1154,7 +1154,7 @@
 					<div class="tsd-signature tsd-kind-icon">GPUPointer<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L25">webgpu.ts:25</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L25">webgpu.ts:25</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1169,7 +1169,7 @@
 					<div class="tsd-signature tsd-kind-icon">Packed<wbr>Func<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span><span class="tsd-signature-symbol">...</span>args<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">any</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">any</span><span class="tsd-signature-symbol"> &amp; </span><a href="interfaces/disp [...]
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L36">runtime.ts:36</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L36">runtime.ts:36</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1184,7 +1184,7 @@
 					<div class="tsd-signature tsd-kind-icon">Pointer<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L25">ctypes.ts:25</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L25">ctypes.ts:25</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1199,7 +1199,7 @@
 					<div class="tsd-signature tsd-kind-icon">Ptr<wbr>Offset<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/ctypes.ts#L28">ctypes.ts:28</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/ctypes.ts#L28">ctypes.ts:28</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1217,7 +1217,7 @@
 					<div class="tsd-signature tsd-kind-icon">RPC_<wbr>MAGIC<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">1045105</span><span class="tsd-signature-symbol"> = 1045105</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/rpc_server.ts#L36">rpc_server.ts:36</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/rpc_server.ts#L36">rpc_server.ts:36</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -1239,7 +1239,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/support.ts#L25">support.ts:25</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/support.ts#L25">support.ts:25</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1271,7 +1271,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/support.ts#L39">support.ts:39</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/support.ts#L39">support.ts:39</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1300,7 +1300,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/support.ts#L52">support.ts:52</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/support.ts#L52">support.ts:52</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1337,7 +1337,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/compact.ts#L38">compact.ts:38</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/compact.ts#L38">compact.ts:38</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1368,7 +1368,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L30">webgpu.ts:30</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L30">webgpu.ts:30</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1390,7 +1390,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/environment.ts#L32">environment.ts:32</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/environment.ts#L32">environment.ts:32</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1421,7 +1421,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/compact.ts#L24">compact.ts:24</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/compact.ts#L24">compact.ts:24</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1443,7 +1443,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L1360">runtime.ts:1360</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L1360">runtime.ts:1360</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1508,7 +1508,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/support.ts#L62">support.ts:62</a></li>
+									<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/support.ts#L62">support.ts:62</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -1530,7 +1530,7 @@
 					<div class="tsd-signature tsd-kind-icon">DLData<wbr>Type<wbr>Code<wbr>ToStr<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">object</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L247">runtime.ts:247</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L247">runtime.ts:247</a></li>
 						</ul>
 					</aside>
 					<section class="tsd-panel tsd-member tsd-kind-variable tsd-parent-kind-object-literal">
@@ -1539,7 +1539,7 @@
 						<div class="tsd-signature tsd-kind-icon">0<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;int&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L248">runtime.ts:248</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L248">runtime.ts:248</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1549,7 +1549,7 @@
 						<div class="tsd-signature tsd-kind-icon">1<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;uint&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L249">runtime.ts:249</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L249">runtime.ts:249</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1559,7 +1559,7 @@
 						<div class="tsd-signature tsd-kind-icon">2<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;float&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L250">runtime.ts:250</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L250">runtime.ts:250</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1569,7 +1569,7 @@
 						<div class="tsd-signature tsd-kind-icon">3<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;handle&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L251">runtime.ts:251</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L251">runtime.ts:251</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1580,7 +1580,7 @@
 					<div class="tsd-signature tsd-kind-icon">Device<wbr>Enum<wbr>ToStr<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">object</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L175">runtime.ts:175</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L175">runtime.ts:175</a></li>
 						</ul>
 					</aside>
 					<section class="tsd-panel tsd-member tsd-kind-variable tsd-parent-kind-object-literal">
@@ -1589,7 +1589,7 @@
 						<div class="tsd-signature tsd-kind-icon">1<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;cpu&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L176">runtime.ts:176</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L176">runtime.ts:176</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1599,7 +1599,7 @@
 						<div class="tsd-signature tsd-kind-icon">15<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;webgpu&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L180">runtime.ts:180</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L180">runtime.ts:180</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1609,7 +1609,7 @@
 						<div class="tsd-signature tsd-kind-icon">2<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;gpu&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L177">runtime.ts:177</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L177">runtime.ts:177</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1619,7 +1619,7 @@
 						<div class="tsd-signature tsd-kind-icon">4<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;opencl&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L178">runtime.ts:178</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L178">runtime.ts:178</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1629,7 +1629,7 @@
 						<div class="tsd-signature tsd-kind-icon">8<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span><span class="tsd-signature-symbol"> = &quot;metal&quot;</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L179">runtime.ts:179</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L179">runtime.ts:179</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1640,7 +1640,7 @@
 					<div class="tsd-signature tsd-kind-icon">Device<wbr>Str<wbr>ToEnum<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">object</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L183">runtime.ts:183</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L183">runtime.ts:183</a></li>
 						</ul>
 					</aside>
 					<section class="tsd-panel tsd-member tsd-kind-variable tsd-parent-kind-object-literal">
@@ -1649,7 +1649,7 @@
 						<div class="tsd-signature tsd-kind-icon">cl<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 4</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L187">runtime.ts:187</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L187">runtime.ts:187</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1659,7 +1659,7 @@
 						<div class="tsd-signature tsd-kind-icon">cpu<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 1</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L184">runtime.ts:184</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L184">runtime.ts:184</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1669,7 +1669,7 @@
 						<div class="tsd-signature tsd-kind-icon">cuda<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 2</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L186">runtime.ts:186</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L186">runtime.ts:186</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1679,7 +1679,7 @@
 						<div class="tsd-signature tsd-kind-icon">gpu<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 2</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L185">runtime.ts:185</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L185">runtime.ts:185</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1689,7 +1689,7 @@
 						<div class="tsd-signature tsd-kind-icon">metal<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 8</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L190">runtime.ts:190</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L190">runtime.ts:190</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1699,7 +1699,7 @@
 						<div class="tsd-signature tsd-kind-icon">opencl<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 4</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L188">runtime.ts:188</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L188">runtime.ts:188</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1709,7 +1709,7 @@
 						<div class="tsd-signature tsd-kind-icon">vulkan<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 7</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L189">runtime.ts:189</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L189">runtime.ts:189</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1719,7 +1719,7 @@
 						<div class="tsd-signature tsd-kind-icon">webgpu<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 15</span></div>
 						<aside class="tsd-sources">
 							<ul>
-								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/runtime.ts#L191">runtime.ts:191</a></li>
+								<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/runtime.ts#L191">runtime.ts:191</a></li>
 							</ul>
 						</aside>
 					</section>
@@ -1950,5 +1950,6 @@
 </div>
 <div class="overlay"></div>
 <script src="assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/interfaces/disposable.html b/docs/api/typedoc/interfaces/disposable.html
index 7938c8b..765ecba 100644
--- a/docs/api/typedoc/interfaces/disposable.html
+++ b/docs/api/typedoc/interfaces/disposable.html
@@ -113,7 +113,7 @@
 					<div class="tsd-signature tsd-kind-icon">dispose<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/types.ts#L52">types.ts:52</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/types.ts#L52">types.ts:52</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -188,5 +188,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/interfaces/functioninfo.html b/docs/api/typedoc/interfaces/functioninfo.html
index 27fe988..20546ac 100644
--- a/docs/api/typedoc/interfaces/functioninfo.html
+++ b/docs/api/typedoc/interfaces/functioninfo.html
@@ -95,7 +95,7 @@
 					<div class="tsd-signature tsd-kind-icon">arg_<wbr>types<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L41">webgpu.ts:41</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L41">webgpu.ts:41</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -105,7 +105,7 @@
 					<div class="tsd-signature tsd-kind-icon">name<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">string</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L40">webgpu.ts:40</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L40">webgpu.ts:40</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -115,7 +115,7 @@
 					<div class="tsd-signature tsd-kind-icon">thread_<wbr>axis_<wbr>tags<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/webgpu.ts#L42">webgpu.ts:42</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/webgpu.ts#L42">webgpu.ts:42</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -174,5 +174,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/api/typedoc/interfaces/libraryprovider.html b/docs/api/typedoc/interfaces/libraryprovider.html
index 4748e7c..3da4d9d 100644
--- a/docs/api/typedoc/interfaces/libraryprovider.html
+++ b/docs/api/typedoc/interfaces/libraryprovider.html
@@ -112,7 +112,7 @@
 					<div class="tsd-signature tsd-kind-icon">imports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">any</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/types.ts#L34">types.ts:34</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/types.ts#L34">types.ts:34</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -127,7 +127,7 @@
 					<div class="tsd-signature tsd-kind-icon">start<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>inst<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">Instance</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/6cae5e0/web/src/types.ts#L39">types.ts:39</a></li>
+							<li>Defined in <a href="https://github.com/apache/incubator-tvm/blob/728b829/web/src/types.ts#L39">types.ts:39</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -214,5 +214,6 @@
 </div>
 <div class="overlay"></div>
 <script src="../assets/js/main.js"></script>
+<script>if (location.protocol == 'file:') document.write('<script src="../assets/js/search.js"><' + '/script>');</script>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/contribute/code_guide.html b/docs/contribute/code_guide.html
index 9c52d87..f100656 100644
--- a/docs/contribute/code_guide.html
+++ b/docs/contribute/code_guide.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Code Guide and Tips &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Code Guide and Tips &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/code_review.html b/docs/contribute/code_review.html
index dbe8e69..70200ae 100644
--- a/docs/contribute/code_review.html
+++ b/docs/contribute/code_review.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Perform Code Reviews &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Perform Code Reviews &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/committer_guide.html b/docs/contribute/committer_guide.html
index 61166a7..6868027 100644
--- a/docs/contribute/committer_guide.html
+++ b/docs/contribute/committer_guide.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Committer Guide &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Committer Guide &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/community.html b/docs/contribute/community.html
index 4622233..947a977 100644
--- a/docs/contribute/community.html
+++ b/docs/contribute/community.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>TVM Community Guideline &mdash; tvm 0.7.dev1 documentation</title>
+  <title>TVM Community Guideline &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/document.html b/docs/contribute/document.html
index d7d343e..68bed64 100644
--- a/docs/contribute/document.html
+++ b/docs/contribute/document.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Write Document and Tutorials &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Write Document and Tutorials &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/error_handling.html b/docs/contribute/error_handling.html
index dd2dc29..1494d44 100644
--- a/docs/contribute/error_handling.html
+++ b/docs/contribute/error_handling.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Error Handling Guide &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Error Handling Guide &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/git_howto.html b/docs/contribute/git_howto.html
index c8e755d..3a5d8a8 100644
--- a/docs/contribute/git_howto.html
+++ b/docs/contribute/git_howto.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Git Usage Tips &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Git Usage Tips &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/index.html b/docs/contribute/index.html
index 840d410..532bb19 100644
--- a/docs/contribute/index.html
+++ b/docs/contribute/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Contribute to TVM &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Contribute to TVM &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/pull_request.html b/docs/contribute/pull_request.html
index 7eb82c6..fc435f6 100644
--- a/docs/contribute/pull_request.html
+++ b/docs/contribute/pull_request.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Submit a Pull Request &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Submit a Pull Request &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/contribute/release_process.html b/docs/contribute/release_process.html
index 5b4f2fa..faf253e 100644
--- a/docs/contribute/release_process.html
+++ b/docs/contribute/release_process.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Apache TVM (incubating) Release Process &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Apache TVM (incubating) Release Process &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -252,7 +252,17 @@
 <p>You can skip this section if you have already uploaded your key.</p>
 <p>After generating the gpg key, you need to upload your key to a public key server. Please refer to <a class="reference external" href="https://www.apache.org/dev/openpgp.html#generate-key">https://www.apache.org/dev/openpgp.html#generate-key</a> for details.</p>
 <p>If you want to do the release on another machine, you can transfer your gpg key to that machine via the <code class="code docutils literal notranslate"><span class="pre">gpg</span> <span class="pre">--export</span></code> and <code class="code docutils literal notranslate"><span class="pre">gpg</span> <span class="pre">--import</span></code> commands.</p>
-<p>The last step is to update the KEYS file with your code signing key <a class="reference external" href="https://www.apache.org/dev/openpgp.html#export-public-key">https://www.apache.org/dev/openpgp.html#export-public-key</a>. Check in the changes to the master branch.</p>
+<p>The last step is to update the KEYS file with your code signing key <a class="reference external" href="https://www.apache.org/dev/openpgp.html#export-public-key">https://www.apache.org/dev/openpgp.html#export-public-key</a>. Check in the changes to the TVM master branch, as well as ASF SVN,</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span class="c1"># the --depth=files will avoid checkout existing folders</span>
+svn co --depth<span class="o">=</span>files <span class="s2">&quot;https://dist.apache.org/repos/dist/dev/incubator/tvm&quot;</span> svn-tvm
+<span class="nb">cd</span> svn-tvm
+<span class="c1"># edit KEYS file</span>
+svn ci --username <span class="nv">$A</span>SF_USERNAME --password <span class="s2">&quot;</span><span class="nv">$A</span><span class="s2">SF_PASSWORD&quot;</span> -m <span class="s2">&quot;Update KEYS&quot;</span>
+<span class="c1"># update downloads.apache.org</span>
+svn rm --username <span class="nv">$A</span>SF_USERNAME --password <span class="s2">&quot;</span><span class="nv">$A</span><span class="s2">SF_PASSWORD&quot;</span> https://dist.apache.org/repos/dist/release/incubator/tvm/KEYS -m <span class="s2">&quot;Update KEYS&quot;</span>
+svn cp --username <span class="nv">$A</span>SF_USERNAME --password <span class="s2">&quot;</span><span class="nv">$A</span><span class="s2">SF_PASSWORD&quot;</span> https://dist.apache.org/repos/dist/dev/incubator/tvm/KEYS https://dist.apache.org/repos/dist/release/incubator/tvm/ -m <span class="s2">&quot;Update KEYS&quot;</span>
+</pre></div>
+</div>
 </div>
 <div class="section" id="cut-a-release-candidate">
 <h2>Cut a Release Candidate<a class="headerlink" href="#cut-a-release-candidate" title="Permalink to this headline">¶</a></h2>
diff --git a/docs/deploy/android.html b/docs/deploy/android.html
index b6f0f71..7c69beb 100644
--- a/docs/deploy/android.html
+++ b/docs/deploy/android.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy to Android &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy to Android &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/deploy/arm_compute_lib.html b/docs/deploy/arm_compute_lib.html
index a8c833d..86299b2 100644
--- a/docs/deploy/arm_compute_lib.html
+++ b/docs/deploy/arm_compute_lib.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Relay Arm ® Compute Library Integration &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Relay Arm ® Compute Library Integration &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/deploy/cpp_deploy.html b/docs/deploy/cpp_deploy.html
index a71e541..06b8397 100644
--- a/docs/deploy/cpp_deploy.html
+++ b/docs/deploy/cpp_deploy.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy TVM Module using C++ API &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy TVM Module using C++ API &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/deploy/hls.html b/docs/deploy/hls.html
index d5c2150..8a7dc1e 100644
--- a/docs/deploy/hls.html
+++ b/docs/deploy/hls.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>HLS Backend Example &mdash; tvm 0.7.dev1 documentation</title>
+  <title>HLS Backend Example &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/deploy/index.html b/docs/deploy/index.html
index 2861169..262b391 100644
--- a/docs/deploy/index.html
+++ b/docs/deploy/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy and Integration &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy and Integration &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/deploy/integrate.html b/docs/deploy/integrate.html
index 11bf870..99b1f3d 100644
--- a/docs/deploy/integrate.html
+++ b/docs/deploy/integrate.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Integrate TVM into Your Project &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Integrate TVM into Your Project &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/benchmark.html b/docs/dev/benchmark.html
index 54da1c5..610d870 100644
--- a/docs/dev/benchmark.html
+++ b/docs/dev/benchmark.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Benchmark Performance Log Format &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Benchmark Performance Log Format &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/codebase_walkthrough.html b/docs/dev/codebase_walkthrough.html
index a5bb111..dbe392d 100644
--- a/docs/dev/codebase_walkthrough.html
+++ b/docs/dev/codebase_walkthrough.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>TVM Codebase Walkthrough by Example &mdash; tvm 0.7.dev1 documentation</title>
+  <title>TVM Codebase Walkthrough by Example &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/convert_layout.html b/docs/dev/convert_layout.html
index 2f18835..e5e7f10 100644
--- a/docs/dev/convert_layout.html
+++ b/docs/dev/convert_layout.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Convert Layout Pass &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Convert Layout Pass &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/debugger.html b/docs/dev/debugger.html
index 41a4276..75e7020 100644
--- a/docs/dev/debugger.html
+++ b/docs/dev/debugger.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Debugger &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Debugger &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/frontend/tensorflow.html b/docs/dev/frontend/tensorflow.html
index 4bc6cc8..668ad49 100644
--- a/docs/dev/frontend/tensorflow.html
+++ b/docs/dev/frontend/tensorflow.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>TensorFlow Frontend &mdash; tvm 0.7.dev1 documentation</title>
+  <title>TensorFlow Frontend &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/how_to.html b/docs/dev/how_to.html
index 41ef36a..2b4ba9f 100644
--- a/docs/dev/how_to.html
+++ b/docs/dev/how_to.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Developer How-To Guide &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Developer How-To Guide &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/hybrid_script.html b/docs/dev/hybrid_script.html
index 8c06c84..fb2d7d1 100644
--- a/docs/dev/hybrid_script.html
+++ b/docs/dev/hybrid_script.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Hybrid Frontend Developer Guide &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Hybrid Frontend Developer Guide &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/index.html b/docs/dev/index.html
index 28457ba..30ab00f 100644
--- a/docs/dev/index.html
+++ b/docs/dev/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Design and Architecture &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Design and Architecture &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/inferbound.html b/docs/dev/inferbound.html
index f8357b0..33601cd 100644
--- a/docs/dev/inferbound.html
+++ b/docs/dev/inferbound.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>InferBound Pass &mdash; tvm 0.7.dev1 documentation</title>
+  <title>InferBound Pass &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/introduction_to_module_serialization.html b/docs/dev/introduction_to_module_serialization.html
index a838b93..e6611bc 100644
--- a/docs/dev/introduction_to_module_serialization.html
+++ b/docs/dev/introduction_to_module_serialization.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Introduction to Module Serialization &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Introduction to Module Serialization &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/pass_infra.html b/docs/dev/pass_infra.html
index f775a36..84d8007 100644
--- a/docs/dev/pass_infra.html
+++ b/docs/dev/pass_infra.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Pass Infrastructure &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Pass Infrastructure &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/relay_add_op.html b/docs/dev/relay_add_op.html
index cfb5eab..0c37b18 100644
--- a/docs/dev/relay_add_op.html
+++ b/docs/dev/relay_add_op.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Adding an Operator to Relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Adding an Operator to Relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/relay_add_pass.html b/docs/dev/relay_add_pass.html
index 2c4be71..3673dba 100644
--- a/docs/dev/relay_add_pass.html
+++ b/docs/dev/relay_add_pass.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Adding a Compiler Pass to Relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Adding a Compiler Pass to Relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/relay_bring_your_own_codegen.html b/docs/dev/relay_bring_your_own_codegen.html
index 7a1de27..040d26e 100644
--- a/docs/dev/relay_bring_your_own_codegen.html
+++ b/docs/dev/relay_bring_your_own_codegen.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Bring Your Own Codegen To TVM &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Bring Your Own Codegen To TVM &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/relay_intro.html b/docs/dev/relay_intro.html
index af1a365..cb92df5 100644
--- a/docs/dev/relay_intro.html
+++ b/docs/dev/relay_intro.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Introduction to Relay IR &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Introduction to Relay IR &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/relay_op_strategy.html b/docs/dev/relay_op_strategy.html
index d40262b..f5fe045 100644
--- a/docs/dev/relay_op_strategy.html
+++ b/docs/dev/relay_op_strategy.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Relay Operator Strategy &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Relay Operator Strategy &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/runtime.html b/docs/dev/runtime.html
index ba65e35..92861b4 100644
--- a/docs/dev/runtime.html
+++ b/docs/dev/runtime.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>TVM Runtime System &mdash; tvm 0.7.dev1 documentation</title>
+  <title>TVM Runtime System &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/security.html b/docs/dev/security.html
index 20dc466..b2573d4 100644
--- a/docs/dev/security.html
+++ b/docs/dev/security.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Security Guide &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Security Guide &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/dev/virtual_machine.html b/docs/dev/virtual_machine.html
index 7c634d7..521e7e4 100644
--- a/docs/dev/virtual_machine.html
+++ b/docs/dev/virtual_machine.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Putting the VM in TVM: The Relay Virtual Machine &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Putting the VM in TVM: The Relay Virtual Machine &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/faq.html b/docs/faq.html
index cca5a8d..ae43e6d 100644
--- a/docs/faq.html
+++ b/docs/faq.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Frequently Asked Questions &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Frequently Asked Questions &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/genindex.html b/docs/genindex.html
index cb5be1c..25ee68b 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -9,7 +9,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Index &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Index &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -67,7 +67,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -527,6 +527,8 @@
       <ul>
         <li><a href="api/python/contrib.html#tvm.contrib.cublas.batch_matmul">(in module tvm.contrib.cublas)</a>
 </li>
+        <li><a href="api/python/contrib.html#tvm.contrib.rocblas.batch_matmul">(in module tvm.contrib.rocblas)</a>
+</li>
         <li><a href="api/python/relay/nn.html#tvm.relay.nn.batch_matmul">(in module tvm.relay.nn)</a>
 </li>
         <li><a href="api/python/topi.html#tvm.topi.nn.batch_matmul">(in module tvm.topi.nn)</a>
@@ -576,10 +578,10 @@
       </ul></li>
       <li><a href="api/python/relay/nn.html#tvm.relay.nn.bitserial_conv2d">bitserial_conv2d() (in module tvm.relay.nn)</a>
 </li>
-      <li><a href="api/python/topi.html#tvm.topi.nn.bitserial_conv2d_legalize">bitserial_conv2d_legalize() (in module tvm.topi.nn)</a>
-</li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="api/python/topi.html#tvm.topi.nn.bitserial_conv2d_legalize">bitserial_conv2d_legalize() (in module tvm.topi.nn)</a>
+</li>
       <li><a href="api/python/topi.html#tvm.topi.nn.bitserial_conv2d_nchw">bitserial_conv2d_nchw() (in module tvm.topi.nn)</a>
 </li>
       <li><a href="api/python/topi.html#tvm.topi.nn.bitserial_conv2d_nhwc">bitserial_conv2d_nhwc() (in module tvm.topi.nn)</a>
diff --git a/docs/index.html b/docs/index.html
index c038f1f..50251fb 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>TVM Documentation &mdash; tvm 0.7.dev1 documentation</title>
+  <title>TVM Documentation &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -67,7 +67,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/install/docker.html b/docs/install/docker.html
index c8d1196..5308d6f 100644
--- a/docs/install/docker.html
+++ b/docs/install/docker.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Docker Images &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Docker Images &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/install/from_source.html b/docs/install/from_source.html
index 5bfbcec..2e6f0bc 100644
--- a/docs/install/from_source.html
+++ b/docs/install/from_source.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Install from Source &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Install from Source &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/install/index.html b/docs/install/index.html
index cd170a8..af025f2 100644
--- a/docs/install/index.html
+++ b/docs/install/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Installation &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Installation &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/install/nnpack.html b/docs/install/nnpack.html
index 7b100e7..4f08a3d 100644
--- a/docs/install/nnpack.html
+++ b/docs/install/nnpack.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>NNPACK Contrib Installation &mdash; tvm 0.7.dev1 documentation</title>
+  <title>NNPACK Contrib Installation &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/hybrid_script.html b/docs/langref/hybrid_script.html
index 2ff5f96..40a1d80 100644
--- a/docs/langref/hybrid_script.html
+++ b/docs/langref/hybrid_script.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Hybrid Frontend Language Reference &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Hybrid Frontend Language Reference &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/index.html b/docs/langref/index.html
index e7af55d..15b9ebe 100644
--- a/docs/langref/index.html
+++ b/docs/langref/index.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Language Reference &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Language Reference &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/relay_adt.html b/docs/langref/relay_adt.html
index 0988fde..e256506 100644
--- a/docs/langref/relay_adt.html
+++ b/docs/langref/relay_adt.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Algebraic Data Types in Relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Algebraic Data Types in Relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/relay_expr.html b/docs/langref/relay_expr.html
index 9088873..b9e49b1 100644
--- a/docs/langref/relay_expr.html
+++ b/docs/langref/relay_expr.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Expressions in Relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Expressions in Relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/relay_op.html b/docs/langref/relay_op.html
index 1307052..dbfeeb3 100644
--- a/docs/langref/relay_op.html
+++ b/docs/langref/relay_op.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Relay Core Tensor Operators &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Relay Core Tensor Operators &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/relay_pattern.html b/docs/langref/relay_pattern.html
index 90b01bb..0e429bb 100644
--- a/docs/langref/relay_pattern.html
+++ b/docs/langref/relay_pattern.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Pattern Matching in Relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Pattern Matching in Relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/langref/relay_type.html b/docs/langref/relay_type.html
index e80e640..ed0a0aa 100644
--- a/docs/langref/relay_type.html
+++ b/docs/langref/relay_type.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Relay’s Type System &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Relay’s Type System &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/objects.inv b/docs/objects.inv
index 661b60c..25ad7be 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/py-modindex.html b/docs/py-modindex.html
index d1a70eb..5da35e7 100644
--- a/docs/py-modindex.html
+++ b/docs/py-modindex.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Python Module Index &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Python Module Index &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -69,7 +69,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/search.html b/docs/search.html
index 6b0b873..ec3af0c 100644
--- a/docs/search.html
+++ b/docs/search.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Search &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Search &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -67,7 +67,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/searchindex.js b/docs/searchindex.js
index add553b..5bc576a 100644
--- a/docs/searchindex.js
+++ b/docs/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["api/links","api/python/auto_scheduler","api/python/autotvm","api/python/contrib","api/python/driver","api/python/error","api/python/graph_runtime","api/python/index","api/python/ir","api/python/micro","api/python/ndarray","api/python/relay/analysis","api/python/relay/backend","api/python/relay/dataflow_pattern","api/python/relay/frontend","api/python/relay/image","api/python/relay/index","api/python/relay/nn","api/python/relay/testing","api/python/relay/transf [...]
\ No newline at end of file
+Search.setIndex({docnames:["api/links","api/python/auto_scheduler","api/python/autotvm","api/python/contrib","api/python/driver","api/python/error","api/python/graph_runtime","api/python/index","api/python/ir","api/python/micro","api/python/ndarray","api/python/relay/analysis","api/python/relay/backend","api/python/relay/dataflow_pattern","api/python/relay/frontend","api/python/relay/image","api/python/relay/index","api/python/relay/nn","api/python/relay/testing","api/python/relay/transf [...]
\ No newline at end of file
diff --git a/docs/tutorials/auto_scheduler/sg_execution_times.html b/docs/tutorials/auto_scheduler/sg_execution_times.html
index 2ffe55f..7ebe17b 100644
--- a/docs/tutorials/auto_scheduler/sg_execution_times.html
+++ b/docs/tutorials/auto_scheduler/sg_execution_times.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Computation times &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Computation times &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -66,7 +66,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -192,10 +192,10 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-tutorials-auto-scheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>04:42.879</strong> total execution time for <strong>tutorials_auto_scheduler</strong> files:</p>
+<p><strong>03:32.923</strong> total execution time for <strong>tutorials_auto_scheduler</strong> files:</p>
 <ul class="simple">
-<li><p><strong>02:56.708</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-tutorials-auto-scheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a convolution layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
-<li><p><strong>01:46.171</strong>: <a class="reference internal" href="tune_matmul_x86.html#sphx-glr-tutorials-auto-scheduler-tune-matmul-x86-py"><span class="std std-ref">Auto-scheduling matrix multiplication for CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_matmul_x86.py</span></code>)</p></li>
+<li><p><strong>01:57.640</strong>: <a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-tutorials-auto-scheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a convolution layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></li>
+<li><p><strong>01:35.283</strong>: <a class="reference internal" href="tune_matmul_x86.html#sphx-glr-tutorials-auto-scheduler-tune-matmul-x86-py"><span class="std std-ref">Auto-scheduling matrix multiplication for CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_matmul_x86.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/tutorials/auto_scheduler/tune_conv2d_layer_cuda.html b/docs/tutorials/auto_scheduler/tune_conv2d_layer_cuda.html
index ac76752..b5754c0 100644
--- a/docs/tutorials/auto_scheduler/tune_conv2d_layer_cuda.html
+++ b/docs/tutorials/auto_scheduler/tune_conv2d_layer_cuda.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Auto-scheduling a convolution layer for GPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Auto-scheduling a convolution layer for GPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -321,63 +321,278 @@ cooperative fetching, unrolling and operator fusion.</p>
              kernel: Buffer(kernel_2: Pointer(float32), float32, [512, 512, 3, 3], []),
              data: Buffer(data_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 28;
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 64;
   attr [compute_3: Pointer(float32)] &quot;storage_scope&quot; = &quot;local&quot;;
-  allocate(compute_3, float32, [16]);
+  allocate(compute_3, float32, [8]);
   attr [pad_temp.shared: Pointer(float32)] &quot;storage_scope&quot; = &quot;shared&quot;;
-  allocate(pad_temp.shared, float32, [144]);
+  allocate(pad_temp.shared, float32, [1296]);
   attr [kernel.shared: Pointer(float32)] &quot;storage_scope&quot; = &quot;shared&quot;;
-  allocate(kernel.shared, float32, [6144]);
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+  allocate(kernel.shared, float32, [1152]);
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
     compute_3[0] = 0f32
-    compute_3[4] = 0f32
-    compute_3[8] = 0f32
-    compute_3[12] = 0f32
     compute_3[1] = 0f32
-    compute_3[5] = 0f32
-    compute_3[9] = 0f32
-    compute_3[13] = 0f32
     compute_3[2] = 0f32
-    compute_3[6] = 0f32
-    compute_3[10] = 0f32
-    compute_3[14] = 0f32
     compute_3[3] = 0f32
+    compute_3[4] = 0f32
+    compute_3[5] = 0f32
+    compute_3[6] = 0f32
     compute_3[7] = 0f32
-    compute_3[11] = 0f32
-    compute_3[15] = 0f32
     for (rc.outer.outer: int32, 0, 32) {
-      for (ry.outer.outer: int32, 0, 3) {
-        attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        pad_temp.shared[threadIdx.x_1] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), (float32*)data_2[((((((rc.outer.outer*784) + (floordiv(threadIdx.x_1, 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        pad_temp.shared[(threadIdx.x_1 + 56)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), (float32*)data_2[((((((rc.outer.outer*784) + (floordiv((threadIdx.x_1 + 56), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-        if @tir.likely((threadIdx.x_1 &lt; 32), dtype=bool) {
-          pad_temp.shared[(threadIdx.x_1 + 112)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), (float32*)data_2[((((((rc.outer.outer*784) + (floordiv((threadIdx.x_1 + 112), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype [...]
+      attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        pad_temp.shared[(threadIdx.x_1*4)] = @tir.if_then_else(((((9 &lt;= floormod((threadIdx.x_1*4), 81)) &amp;&amp; (floormod((threadIdx.x_1*4), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv((threadIdx.x_1*4), 81)*49)) + (floordiv(floormod((threadIdx.x_1*4), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
+        pad_temp.shared[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 1), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 1), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0 [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 2), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 2), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 2), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0 [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 3), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 3), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 3), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0 [...]
+      }
+      attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        pad_temp.shared[((threadIdx.x_1*4) + 196)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 34), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 34), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 7), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 7), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 196), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 34), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 197)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 35), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 35), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 8), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 8), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 197), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 35), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 198)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 36), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 36), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 198), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 36), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype= [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 199)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 37), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 37), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 199), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 37), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - [...]
+      }
+      attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        pad_temp.shared[((threadIdx.x_1*4) + 392)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 68), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 68), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 5), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 5), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 392), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 68), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 5), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 393)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 69), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 69), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 6), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 6), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 393), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 69), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 394)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 70), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 70), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 7), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 7), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 394), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 70), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 395)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 71), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 71), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 8), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 8), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 395), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 71), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - [...]
+      }
+      attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        pad_temp.shared[((threadIdx.x_1*4) + 588)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 21), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 21), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 588), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 21), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 589)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 22), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 22), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 4), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 4), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 589), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 22), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 4), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 590)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 23), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 23), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 5), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 5), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 590), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 23), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 5), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 591)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 24), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 24), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 6), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 6), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 591), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 24), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9)) - [...]
+      }
+      attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        pad_temp.shared[((threadIdx.x_1*4) + 784)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 55), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 55), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 784), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 55), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 785)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 56), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 56), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 785), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 56), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 786)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 57), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 57), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 786), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 57), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 787)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 58), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 58), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 4), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 4), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 787), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 58), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 4), 9)) - [...]
+      }
+      attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        pad_temp.shared[((threadIdx.x_1*4) + 980)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 8), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 8), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 8), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 8), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 980), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 8), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), 9)) - 8) [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 981)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 9), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 9), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 981), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 9), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32, dtype=float32)
+        pad_temp.shared[((threadIdx.x_1*4) + 982)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 10), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 10), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 982), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 10), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - [...]
+        pad_temp.shared[((threadIdx.x_1*4) + 983)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 11), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 11), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 983), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 11), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - [...]
+      }
+      attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49 {
+        if @tir.likely((threadIdx.x_1 &lt; 30), dtype=bool) {
+          pad_temp.shared[((threadIdx.x_1*4) + 1176)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 42), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 42), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 6), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 6), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1176), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 42), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 6), 9 [...]
         }
-        for (ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer: int32, 0, 110) {
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
-          if @tir.likely((((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2) &lt; 6144), dtype=bool) {
-            kernel.shared[((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2)] = (float32*)kernel_2[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2), 48)*4608)) + (rc.outer.outer*144)) + (floordiv(floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2), 48), 3)*9)) + (ry.outer.outer*3)) + floormod(((ax0.ax1.fused.ax2.fused.ax3.fused.outer.outer*56) + threadIdx.x_2), 3))]
+        if @tir.likely(((threadIdx.x_1*4) &lt; 119), dtype=bool) {
+          if @tir.likely((threadIdx.x_1 &lt; 30), dtype=bool) {
+            pad_temp.shared[((threadIdx.x_1*4) + 1177)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 43), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 43), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 7), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 7), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1177), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 43), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 7), [...]
           }
         }
-        for (rx.outer.inner: int32, 0, 3) {
-          for (ff.outer.inner: int32, 0, 4) {
-            for (rc.inner: int32, 0, 16) {
-              compute_3[ff.outer.inner] = ((float32*)compute_3[ff.outer.inner] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner)]))
-              compute_3[(ff.outer.inner + 4)] = ((float32*)compute_3[(ff.outer.inner + 4)] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner) + 1536)]))
-              compute_3[(ff.outer.inner + 8)] = ((float32*)compute_3[(ff.outer.inner + 8)] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner) + 3072)]))
-              compute_3[(ff.outer.inner + 12)] = ((float32*)compute_3[(ff.outer.inner + 12)] + ((float32*)pad_temp.shared[(((rc.inner*9) + rx.outer.inner) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(((((floordiv(threadIdx.x, 7)*192) + (ff.outer.inner*48)) + (rc.inner*3)) + rx.outer.inner) + 4608)]))
-            }
+        if @tir.likely(((threadIdx.x_1*4) &lt; 118), dtype=bool) {
+          if @tir.likely((threadIdx.x_1 &lt; 30), dtype=bool) {
+            pad_temp.shared[((threadIdx.x_1*4) + 1178)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 44), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 44), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 8), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 8), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1178), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 44), 81), 9)*7)) + floormod(((threadIdx.x_1*4) + 8), [...]
           }
         }
+        if @tir.likely(((threadIdx.x_1*4) &lt; 117), dtype=bool) {
+          if @tir.likely((threadIdx.x_1 &lt; 30), dtype=bool) {
+            pad_temp.shared[((threadIdx.x_1*4) + 1179)] = @tir.if_then_else(((((9 &lt;= floormod(((threadIdx.x_1*4) + 45), 81)) &amp;&amp; (floormod(((threadIdx.x_1*4) + 45), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), (float32*)data_2[(((((rc.outer.outer*784) + (floordiv(((threadIdx.x_1*4) + 1179), 81)*49)) + (floordiv(floormod(((threadIdx.x_1*4) + 45), 81), 9)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f32,  [...]
+          }
+        }
+      }
+      attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[threadIdx.x_2] = (float32*)kernel_2[(((blockIdx.x*36864) + (rc.outer.outer*144)) + threadIdx.x_2)]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 49)] = (float32*)kernel_2[(((blockIdx.x*36864) + (rc.outer.outer*144)) + (threadIdx.x_2 + 49))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 98)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 98), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 98), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 147)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 147), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 3), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 196)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 196), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 52), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 245)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 245), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 101), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 294)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 294), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 6), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 343)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 343), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 55), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 392)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 392), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 104), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 441)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 441), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 9), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 490)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 490), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 58), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 539)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 539), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 107), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 588)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 588), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 12), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 637)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 637), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 61), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 686)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 686), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 110), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 735)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 735), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 15), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 784)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 784), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 64), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 833)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 833), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 113), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 882)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 882), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 18), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 931)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 931), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 67), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 980)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 980), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 116), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 1029)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1029), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 21), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      kernel.shared[(threadIdx.x_2 + 1078)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1078), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 70), 144))]
+      attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 49;
+      if @tir.likely((threadIdx.x_2 &lt; 25), dtype=bool) {
+        kernel.shared[(threadIdx.x_2 + 1127)] = (float32*)kernel_2[((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1127), 144)*4608)) + (rc.outer.outer*144)) + floormod((threadIdx.x_2 + 119), 144))]
+      }
+      for (rc.outer.inner: int32, 0, 8) {
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[(rc.outer.inner*18)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 144)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 288)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 432)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 145)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 289)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 433)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 2)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 146)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 290)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 434)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 3)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 147)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 291)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 435)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 4)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 148)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 292)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 436)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 5)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 149)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 293)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 437)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 6)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 150)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 294)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 438)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 7)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 151)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 295)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 439)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 8)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 152)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 296)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 440)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 9)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 153)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 297)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 441)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 10)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 154)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 298)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 442)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 11)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 155)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 299)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 443)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 12)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 156)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 300)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 444)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 13)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 157)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 301)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 445)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 14)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 158)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 302)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 446)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 15)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 159)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 303)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 447)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 16)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 160)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 304)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 448)]))
+        compute_3[0] = ((float32*)compute_3[0] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 17)]))
+        compute_3[1] = ((float32*)compute_3[1] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 161)]))
+        compute_3[2] = ((float32*)compute_3[2] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 305)]))
+        compute_3[3] = ((float32*)compute_3[3] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 449)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 576)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 720)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 864)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[(((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7))]*(float32*)kernel.shared[((rc.outer.inner*18) + 1008)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 577)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 721)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 865)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1009)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 578)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 722)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 866)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1010)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 579)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 723)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 867)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 9)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1011)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 580)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 724)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 868)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 10)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1012)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 581)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 725)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 869)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 11)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1013)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 582)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 726)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 870)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 18)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1014)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 583)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 727)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 871)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 19)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1015)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 584)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 728)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 872)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 20)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1016)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 585)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 729)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 873)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 81)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1017)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 586)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 730)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 874)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 82)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1018)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 587)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 731)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 875)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 83)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1019)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 588)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 732)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 876)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 90)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1020)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 589)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 733)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 877)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 91)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1021)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 590)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 734)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 878)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 92)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1022)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 591)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 735)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 879)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 99)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1023)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 592)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 736)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 880)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 100)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1024)]))
+        compute_3[4] = ((float32*)compute_3[4] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 593)]))
+        compute_3[5] = ((float32*)compute_3[5] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 737)]))
+        compute_3[6] = ((float32*)compute_3[6] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 881)]))
+        compute_3[7] = ((float32*)compute_3[7] + ((float32*)pad_temp.shared[((((rc.outer.inner*162) + (floordiv(threadIdx.x, 7)*9)) + floormod(threadIdx.x, 7)) + 101)]*(float32*)kernel.shared[((rc.outer.inner*18) + 1025)]))
       }
     }
-    for (i1.inner: int32, 0, 4) {
-      compute_2[(((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7))] = max(((float32*)compute_3[i1.inner] + (float32*)bias_2[(((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner)]), 0f32)
-      compute_2[((((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 1568)] = max(((float32*)compute_3[(i1.inner + 4)] + (float32*)bias_2[((((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner) + 32)]), 0f32)
-      compute_2[((((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 3136)] = max(((float32*)compute_3[(i1.inner + 8)] + (float32*)bias_2[((((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner) + 64)]), 0f32)
-      compute_2[((((((floordiv(blockIdx.x, 7)*6272) + (floordiv(threadIdx.x, 7)*196)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + floormod(threadIdx.x, 7)) + 4704)] = max(((float32*)compute_3[(i1.inner + 12)] + (float32*)bias_2[((((floordiv(blockIdx.x, 7)*128) + (floordiv(threadIdx.x, 7)*4)) + i1.inner) + 96)]), 0f32)
+    for (i1.inner: int32, 0, 8) {
+      compute_2[(((blockIdx.x*392) + (i1.inner*49)) + threadIdx.x)] = max(((float32*)compute_3[i1.inner] + (float32*)bias_2[((blockIdx.x*8) + i1.inner)]), 0f32)
     }
   }
 }
@@ -415,7 +630,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Execution time of this operator: 1.361 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Execution time of this operator: 0.153 ms
 </pre></div>
 </div>
 </div>
@@ -442,73 +657,73 @@ print the equivalent python schedule API, and build the binary again.</p>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Equivalent python schedule:
-i0, i1, i2, i3 = tuple(pad_temp.op.axis) + tuple(pad_temp.op.reduce_axis)
-nn, ff, yy, xx, rc, ry, rx = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
-ax0, ax1, ax2, ax3 = tuple(T_add.op.axis) + tuple(T_add.op.reduce_axis)
-i0, i1, i2, i3 = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
+pad_temp_i0, pad_temp_i1, pad_temp_i2, pad_temp_i3 = tuple(pad_temp.op.axis) + tuple(pad_temp.op.reduce_axis)
+compute_nn, compute_ff, compute_yy, compute_xx, compute_rc, compute_ry, compute_rx = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
+T_add_ax0, T_add_ax1, T_add_ax2, T_add_ax3 = tuple(T_add.op.axis) + tuple(T_add.op.reduce_axis)
+compute_i0, compute_i1, compute_i2, compute_i3 = tuple(compute.op.axis) + tuple(compute.op.reduce_axis)
 s[T_add].compute_inline()
-nn_o_i, nn_i = s[compute].split(nn, factor=1)
-nn_o_o_i, nn_o_i = s[compute].split(nn_o_i, factor=1)
-nn_o_o_o_i, nn_o_o_i = s[compute].split(nn_o_o_i, factor=1)
-nn_o_o_o_o, nn_o_o_o_i = s[compute].split(nn_o_o_o_i, factor=1)
-ff_o_i, ff_i = s[compute].split(ff, factor=1)
-ff_o_o_i, ff_o_i = s[compute].split(ff_o_i, factor=1)
-ff_o_o_o_i, ff_o_o_i = s[compute].split(ff_o_o_i, factor=16)
-ff_o_o_o_o, ff_o_o_o_i = s[compute].split(ff_o_o_o_i, factor=1)
-yy_o_i, yy_i = s[compute].split(yy, factor=1)
-yy_o_o_i, yy_o_i = s[compute].split(yy_o_i, factor=7)
-yy_o_o_o_i, yy_o_o_i = s[compute].split(yy_o_o_i, factor=1)
-yy_o_o_o_o, yy_o_o_o_i = s[compute].split(yy_o_o_o_i, factor=1)
-xx_o_i, xx_i = s[compute].split(xx, factor=1)
-xx_o_o_i, xx_o_i = s[compute].split(xx_o_i, factor=1)
-xx_o_o_o_i, xx_o_o_i = s[compute].split(xx_o_o_i, factor=7)
-xx_o_o_o_o, xx_o_o_o_i = s[compute].split(xx_o_o_o_i, factor=1)
-rc_o_i, rc_i = s[compute].split(rc, factor=1)
-rc_o_o, rc_o_i = s[compute].split(rc_o_i, factor=16)
-ry_o_i, ry_i = s[compute].split(ry, factor=3)
-ry_o_o, ry_o_i = s[compute].split(ry_o_i, factor=1)
-rx_o_i, rx_i = s[compute].split(rx, factor=1)
-rx_o_o, rx_o_i = s[compute].split(rx_o_i, factor=3)
-s[compute].reorder(nn_o_o_o_o, ff_o_o_o_o, yy_o_o_o_o, xx_o_o_o_o, nn_o_o_o_i, ff_o_o_o_i, yy_o_o_o_i, xx_o_o_o_i, nn_o_o_i, ff_o_o_i, yy_o_o_i, xx_o_o_i, rc_o_o, ry_o_o, rx_o_o, rc_o_i, ry_o_i, rx_o_i, nn_o_i, ff_o_i, yy_o_i, xx_o_i, rc_i, ry_i, rx_i, nn_i, ff_i, yy_i, xx_i)
-i0_o_i, i0_i = s[compute].split(i0, factor=1)
-i0_o_o_i, i0_o_i = s[compute].split(i0_o_i, factor=1)
-i0_o_o_o, i0_o_o_i = s[compute].split(i0_o_o_i, factor=1)
-i1_o_i, i1_i = s[compute].split(i1, factor=1)
-i1_o_o_i, i1_o_i = s[compute].split(i1_o_i, factor=16)
-i1_o_o_o, i1_o_o_i = s[compute].split(i1_o_o_i, factor=1)
-i2_o_i, i2_i = s[compute].split(i2, factor=7)
-i2_o_o_i, i2_o_i = s[compute].split(i2_o_i, factor=1)
-i2_o_o_o, i2_o_o_i = s[compute].split(i2_o_o_i, factor=1)
-i3_o_i, i3_i = s[compute].split(i3, factor=1)
-i3_o_o_i, i3_o_i = s[compute].split(i3_o_i, factor=7)
-i3_o_o_o, i3_o_o_i = s[compute].split(i3_o_o_i, factor=1)
-s[compute].reorder(i0_o_o_o, i1_o_o_o, i2_o_o_o, i3_o_o_o, i0_o_o_i, i1_o_o_i, i2_o_o_i, i3_o_o_i, i0_o_i, i1_o_i, i2_o_i, i3_o_i, i0_i, i1_i, i2_i, i3_i)
-s[compute].compute_at(s[compute], i3_o_i)
+compute_nn_o_i, compute_nn_i = s[compute].split(compute_nn, factor=1)
+compute_nn_o_o_i, compute_nn_o_i = s[compute].split(compute_nn_o_i, factor=1)
+compute_nn_o_o_o_i, compute_nn_o_o_i = s[compute].split(compute_nn_o_o_i, factor=1)
+compute_nn_o_o_o_o, compute_nn_o_o_o_i = s[compute].split(compute_nn_o_o_o_i, factor=1)
+compute_ff_o_i, compute_ff_i = s[compute].split(compute_ff, factor=1)
+compute_ff_o_o_i, compute_ff_o_i = s[compute].split(compute_ff_o_i, factor=1)
+compute_ff_o_o_o_i, compute_ff_o_o_i = s[compute].split(compute_ff_o_o_i, factor=16)
+compute_ff_o_o_o_o, compute_ff_o_o_o_i = s[compute].split(compute_ff_o_o_o_i, factor=1)
+compute_yy_o_i, compute_yy_i = s[compute].split(compute_yy, factor=7)
+compute_yy_o_o_i, compute_yy_o_i = s[compute].split(compute_yy_o_i, factor=1)
+compute_yy_o_o_o_i, compute_yy_o_o_i = s[compute].split(compute_yy_o_o_i, factor=1)
+compute_yy_o_o_o_o, compute_yy_o_o_o_i = s[compute].split(compute_yy_o_o_o_i, factor=1)
+compute_xx_o_i, compute_xx_i = s[compute].split(compute_xx, factor=1)
+compute_xx_o_o_i, compute_xx_o_i = s[compute].split(compute_xx_o_i, factor=1)
+compute_xx_o_o_o_i, compute_xx_o_o_i = s[compute].split(compute_xx_o_o_i, factor=7)
+compute_xx_o_o_o_o, compute_xx_o_o_o_i = s[compute].split(compute_xx_o_o_o_i, factor=1)
+compute_rc_o_i, compute_rc_i = s[compute].split(compute_rc, factor=2)
+compute_rc_o_o, compute_rc_o_i = s[compute].split(compute_rc_o_i, factor=8)
+compute_ry_o_i, compute_ry_i = s[compute].split(compute_ry, factor=1)
+compute_ry_o_o, compute_ry_o_i = s[compute].split(compute_ry_o_i, factor=3)
+compute_rx_o_i, compute_rx_i = s[compute].split(compute_rx, factor=3)
+compute_rx_o_o, compute_rx_o_i = s[compute].split(compute_rx_o_i, factor=1)
+s[compute].reorder(compute_nn_o_o_o_o, compute_ff_o_o_o_o, compute_yy_o_o_o_o, compute_xx_o_o_o_o, compute_nn_o_o_o_i, compute_ff_o_o_o_i, compute_yy_o_o_o_i, compute_xx_o_o_o_i, compute_nn_o_o_i, compute_ff_o_o_i, compute_yy_o_o_i, compute_xx_o_o_i, compute_rc_o_o, compute_ry_o_o, compute_rx_o_o, compute_rc_o_i, compute_ry_o_i, compute_rx_o_i, compute_nn_o_i, compute_ff_o_i, compute_yy_o_i, compute_xx_o_i, compute_rc_i, compute_ry_i, compute_rx_i, compute_nn_i, compute_ff_i, compute_yy_ [...]
+compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
+compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
+compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=16)
+compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
+compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
+s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
+s[compute].compute_at(s[compute], compute_i3_o_i)
 kernel_shared = s.cache_read(kernel, &quot;shared&quot;, [compute])
-ax0, ax1, ax2, ax3 = tuple(kernel_shared.op.axis)
-s[kernel_shared].compute_at(s[compute], rx_o_o)
+kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3 = tuple(kernel_shared.op.axis)
+s[kernel_shared].compute_at(s[compute], compute_rx_o_o)
 pad_temp_shared = s.cache_read(pad_temp, &quot;shared&quot;, [compute])
-ax0, ax1, ax2, ax3 = tuple(pad_temp_shared.op.axis)
-s[pad_temp_shared].compute_at(s[compute], rx_o_o)
+pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3 = tuple(pad_temp_shared.op.axis)
+s[pad_temp_shared].compute_at(s[compute], compute_rx_o_o)
 s[pad_temp].compute_inline()
-i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused = s[compute].fuse(i0_o_o_o, i1_o_o_o, i2_o_o_o, i3_o_o_o)
-s[compute].bind(i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused, tvm.thread_axis(&quot;blockIdx.x&quot;))
-i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused = s[compute].fuse(i0_o_o_i, i1_o_o_i, i2_o_o_i, i3_o_o_i)
-s[compute].bind(i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused, tvm.thread_axis(&quot;vthread&quot;))
-i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused = s[compute].fuse(i0_o_i, i1_o_i, i2_o_i, i3_o_i)
-s[compute].bind(i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, tvm.thread_axis(&quot;threadIdx.x&quot;))
-ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(ax0, ax1, ax2, ax3)
-ax0_ax1_fused_ax2_fused_ax3_fused_o, ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused, factor=3)
-s[kernel_shared].vectorize(ax0_ax1_fused_ax2_fused_ax3_fused_i)
-ax0_ax1_fused_ax2_fused_ax3_fused_o_o, ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
-s[kernel_shared].bind(ax0_ax1_fused_ax2_fused_ax3_fused_o_i, tvm.thread_axis(&quot;threadIdx.x&quot;))
-ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(ax0, ax1, ax2, ax3)
-ax0_ax1_fused_ax2_fused_ax3_fused_o, ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused, factor=9)
-s[pad_temp_shared].vectorize(ax0_ax1_fused_ax2_fused_ax3_fused_i)
-ax0_ax1_fused_ax2_fused_ax3_fused_o_o, ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
-s[pad_temp_shared].bind(ax0_ax1_fused_ax2_fused_ax3_fused_o_i, tvm.thread_axis(&quot;threadIdx.x&quot;))
-s[compute].pragma(nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 1024)
-s[compute].pragma(nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
+compute_i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused = s[compute].fuse(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o)
+s[compute].bind(compute_i0_o_o_o_i1_o_o_o_fused_i2_o_o_o_fused_i3_o_o_o_fused, te.thread_axis(&quot;blockIdx.x&quot;))
+compute_i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused = s[compute].fuse(compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i)
+s[compute].bind(compute_i0_o_o_i_i1_o_o_i_fused_i2_o_o_i_fused_i3_o_o_i_fused, te.thread_axis(&quot;vthread&quot;))
+compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused = s[compute].fuse(compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i)
+s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread_axis(&quot;threadIdx.x&quot;))
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=6)
+s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=3)
+s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
+s[compute].pragma(compute_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 1024)
+s[compute].pragma(compute_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 </pre></div>
 </div>
 <p>A more complicated example is to resume the search.
@@ -536,7 +751,7 @@ In the example below we resume the status and do more 5 trials.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  56.708 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  57.640 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-auto-scheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/678f3c372a599a18d909aed0fefb30be/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/tutorials/auto_scheduler/tune_matmul_x86.html b/docs/tutorials/auto_scheduler/tune_matmul_x86.html
index 5474d59..b74463b 100644
--- a/docs/tutorials/auto_scheduler/tune_matmul_x86.html
+++ b/docs/tutorials/auto_scheduler/tune_matmul_x86.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Auto-scheduling matrix multiplication for CPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Auto-scheduling matrix multiplication for CPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -293,7 +293,7 @@ After some measurement trials, it will return the best schedule it found.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>*T*T*T*T*T*T*T*T*T
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>*T*T*T*T*T*T*T*T
 </pre></div>
 </div>
 <p>We can lower the schedule to see the IR after auto-scheduling.
@@ -305,83 +305,29 @@ parallelization, vectorization, unrolling and operator fusion.</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>primfn(A_1: handle, B_1: handle, C_1: handle, out_1: handle) -&gt; ()
   attr = {&quot;global_symbol&quot;: &quot;main&quot;, &quot;tir.noalias&quot;: True}
-  buffers = {out: Buffer(out_2: Pointer(float32), float32, [128, 128], []),
-             C: Buffer(C_2: Pointer(float32), float32, [128, 128], []),
+  buffers = {C: Buffer(C_2: Pointer(float32), float32, [128, 128], []),
+             out: Buffer(out_2: Pointer(float32), float32, [128, 128], []),
              B: Buffer(B_2: Pointer(float32), float32, [128, 128], []),
              A: Buffer(A_2: Pointer(float32), float32, [128, 128], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C, out_1: out} {
   attr [matmul: Pointer(float32)] &quot;storage_scope&quot; = &quot;global&quot;;
   allocate(matmul, float32, [16384]) {
-    for (i.outer.outer.inner: int32, 0, 2) {
-      for (j.outer.outer.inner: int32, 0, 16) {
-        for (i.outer.inner.init: int32, 0, 16) {
-          matmul[(((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8))] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 128)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 256)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 384)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 1)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 129)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 257)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 385)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 2)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 130)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 258)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 386)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 3)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 131)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 259)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 387)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 4)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 132)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 260)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 388)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 5)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 133)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 261)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 389)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 6)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 134)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 262)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 390)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 7)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 135)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 263)] = 0f32
-          matmul[((((i.outer.outer.inner*8192) + (i.outer.inner.init*512)) + (j.outer.outer.inner*8)) + 391)] = 0f32
-        }
-        for (k.outer: int32, 0, 128) {
-          for (i.outer.inner: int32, 0, 16) {
-            matmul[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8))] = ((float32*)matmul[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8))] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 128)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 128)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 256)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 256)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 384)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 384)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[((k.outer*128) + (j.outer.outer.inner*8))]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 1)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 1)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 129)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 129)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 257)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 257)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 385)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 385)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 1)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 2)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 2)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 130)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 130)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 258)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 258)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 386)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 386)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 2)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 3)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 3)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 131)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 131)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 259)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 259)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 387)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 387)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 3)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 4)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 4)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 132)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 132)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 260)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 260)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 388)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 388)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 4)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 5)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 5)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 133)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 133)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 261)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 261)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 389)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 389)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 5)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 6)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 6)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 134)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 134)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 262)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 262)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 390)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 390)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 6)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 7)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 7)] + ((float32*)A_2[(((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 135)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 135)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 128)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 263)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 263)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 256)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
-            matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 391)] = ((float32*)matmul[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + (j.outer.outer.inner*8)) + 391)] + ((float32*)A_2[((((i.outer.outer.inner*8192) + (i.outer.inner*512)) + k.outer) + 384)]*(float32*)B_2[(((k.outer*128) + (j.outer.outer.inner*8)) + 7)]))
+    for (i.outer.outer.inner: int32, 0, 64) {
+      for (j.outer.outer.inner: int32, 0, 4) {
+        matmul[ramp(((i.outer.outer.inner*256) + (j.outer.outer.inner*32)), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 128), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 8), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 136), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 16), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 144), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 24), 1, 8)] = broadcast(0f32, 8)
+        matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + 152), 1, 8)] = broadcast(0f32, 8)
+        for (k.outer: int32, 0, 8) {
+          for (j.outer.inner: int32, 0, 4) {
+            for (k.inner: int32, 0, 16) {
+              matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)] = ((float32x8*)matmul[ramp((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)] + (broadcast((float32*)A_2[(((i.outer.outer.inner*256) + (k.outer*16)) + k.inner)], 8)*(float32x8*)B_2[ramp(((((k.outer*2048) + (k.inner*128)) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)]))
+              matmul[ramp(((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)) + 128), 1, 8)] = ((float32x8*)matmul[ramp(((((i.outer.outer.inner*256) + (j.outer.outer.inner*32)) + (j.outer.inner*8)) + 128), 1, 8)] + (broadcast((float32*)A_2[((((i.outer.outer.inner*256) + (k.outer*16)) + k.inner) + 128)], 8)*(float32x8*)B_2[ramp(((((k.outer*2048) + (k.inner*128)) + (j.outer.outer.inner*32)) + (j.outer.inner*8)), 1, 8)]))
+            }
           }
         }
       }
@@ -424,7 +370,7 @@ parallelization, vectorization, unrolling and operator fusion.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Execution time of this operator: 0.345 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Execution time of this operator: 0.130 ms
 </pre></div>
 </div>
 </div>
@@ -451,25 +397,25 @@ print the equivalent python schedule API, and build the binary again.</p>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Equivalent python schedule:
-i, j, k = tuple(matmul.op.axis) + tuple(matmul.op.reduce_axis)
-i, j = tuple(out.op.axis) + tuple(out.op.reduce_axis)
-i_o_i, i_i = s[matmul].split(i, factor=1)
-i_o_o_i, i_o_i = s[matmul].split(i_o_i, factor=128)
-i_o_o_o, i_o_o_i = s[matmul].split(i_o_o_i, factor=1)
-j_o_i, j_i = s[matmul].split(j, factor=4)
-j_o_o_i, j_o_i = s[matmul].split(j_o_i, factor=8)
-j_o_o_o, j_o_o_i = s[matmul].split(j_o_o_i, factor=4)
-k_o, k_i = s[matmul].split(k, factor=4)
-s[matmul].reorder(i_o_o_o, j_o_o_o, i_o_o_i, j_o_o_i, k_o, i_o_i, j_o_i, k_i, i_i, j_i)
-i_o, i_i = s[out].split(i, factor=128)
-j_o, j_i = s[out].split(j, factor=128)
-s[out].reorder(i_o, j_o, i_i, j_i)
-s[matmul].compute_at(s[out], j_o)
-i_o_j_o_fused = s[out].fuse(i_o, j_o)
-s[out].parallel(i_o_j_o_fused)
-s[matmul].pragma(i_o_o_o, &quot;auto_unroll_max_step&quot;, 64)
-s[matmul].pragma(i_o_o_o, &quot;unroll_explicit&quot;, True)
-s[matmul].vectorize(j_i)
+matmul_i, matmul_j, matmul_k = tuple(matmul.op.axis) + tuple(matmul.op.reduce_axis)
+out_i, out_j = tuple(out.op.axis) + tuple(out.op.reduce_axis)
+matmul_i_o_i, matmul_i_i = s[matmul].split(matmul_i, factor=8)
+matmul_i_o_o_i, matmul_i_o_i = s[matmul].split(matmul_i_o_i, factor=16)
+matmul_i_o_o_o, matmul_i_o_o_i = s[matmul].split(matmul_i_o_o_i, factor=1)
+matmul_j_o_i, matmul_j_i = s[matmul].split(matmul_j, factor=1)
+matmul_j_o_o_i, matmul_j_o_i = s[matmul].split(matmul_j_o_i, factor=128)
+matmul_j_o_o_o, matmul_j_o_o_i = s[matmul].split(matmul_j_o_o_i, factor=1)
+matmul_k_o, matmul_k_i = s[matmul].split(matmul_k, factor=4)
+s[matmul].reorder(matmul_i_o_o_o, matmul_j_o_o_o, matmul_i_o_o_i, matmul_j_o_o_i, matmul_k_o, matmul_i_o_i, matmul_j_o_i, matmul_k_i, matmul_i_i, matmul_j_i)
+out_i_o, out_i_i = s[out].split(out_i, factor=128)
+out_j_o, out_j_i = s[out].split(out_j, factor=128)
+s[out].reorder(out_i_o, out_j_o, out_i_i, out_j_i)
+s[matmul].compute_at(s[out], out_j_o)
+out_i_o_j_o_fused = s[out].fuse(out_i_o, out_j_o)
+s[out].parallel(out_i_o_j_o_fused)
+s[matmul].pragma(matmul_i_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
+s[matmul].pragma(matmul_i_o_o_o, &quot;unroll_explicit&quot;, True)
+s[matmul].vectorize(matmul_j_i)
 </pre></div>
 </div>
 <p>A more complicated example is to resume the search.
@@ -508,7 +454,7 @@ This provides an isolation and avoids the conflict in the main thread/process.
 You can also use <a class="reference internal" href="../../api/python/auto_scheduler.html#tvm.auto_scheduler.LocalRPCMeasureContext" title="tvm.auto_scheduler.LocalRPCMeasureContext"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.LocalRPCMeasureContext</span></code></a> for auto-scheduler,
 as shown in the GPU tutorial (<a class="reference internal" href="tune_conv2d_layer_cuda.html#auto-scheduler-conv-gpu"><span class="std std-ref">Auto-scheduling a convolution layer for GPU</span></a>).</p>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  46.171 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  35.283 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-auto-scheduler-tune-matmul-x86-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/91b0339c8f3cc2594cee580dc450149a/tune_matmul_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_matmul_x86.py</span></code></a></p>
diff --git a/docs/tutorials/autotvm/sg_execution_times.html b/docs/tutorials/autotvm/sg_execution_times.html
index d79677e..f6fb133 100644
--- a/docs/tutorials/autotvm/sg_execution_times.html
+++ b/docs/tutorials/autotvm/sg_execution_times.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Computation times &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Computation times &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -66,7 +66,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -192,14 +192,14 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-tutorials-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>01:15.723</strong> total execution time for <strong>tutorials_autotvm</strong> files:</p>
+<p><strong>01:18.464</strong> total execution time for <strong>tutorials_autotvm</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:48.468</strong>: <a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-tutorials-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></li>
-<li><p><strong>00:26.623</strong>: <a class="reference internal" href="tune_simple_template.html#sphx-glr-tutorials-autotvm-tune-simple-template-py"><span class="std std-ref">Writing tunable template and Using auto-tuner</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_simple_template.py</span></code>)</p></li>
-<li><p><strong>00:00.184</strong>: <a class="reference internal" href="tune_relay_cuda.html#sphx-glr-tutorials-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a convolutional network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></li>
-<li><p><strong>00:00.157</strong>: <a class="reference internal" href="tune_relay_x86.html#sphx-glr-tutorials-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a convolutional network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></li>
-<li><p><strong>00:00.147</strong>: <a class="reference internal" href="tune_relay_arm.html#sphx-glr-tutorials-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a convolutional network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></li>
-<li><p><strong>00:00.145</strong>: <a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-tutorials-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a convolutional network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></li>
+<li><p><strong>00:54.966</strong>: <a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-tutorials-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></li>
+<li><p><strong>00:22.896</strong>: <a class="reference internal" href="tune_simple_template.html#sphx-glr-tutorials-autotvm-tune-simple-template-py"><span class="std std-ref">Writing tunable template and Using auto-tuner</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_simple_template.py</span></code>)</p></li>
+<li><p><strong>00:00.160</strong>: <a class="reference internal" href="tune_relay_cuda.html#sphx-glr-tutorials-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a convolutional network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></li>
+<li><p><strong>00:00.155</strong>: <a class="reference internal" href="tune_relay_x86.html#sphx-glr-tutorials-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a convolutional network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></li>
+<li><p><strong>00:00.144</strong>: <a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-tutorials-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a convolutional network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></li>
+<li><p><strong>00:00.143</strong>: <a class="reference internal" href="tune_relay_arm.html#sphx-glr-tutorials-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a convolutional network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/tutorials/autotvm/tune_conv2d_cuda.html b/docs/tutorials/autotvm/tune_conv2d_cuda.html
index f5a94e6..9ced3b5 100644
--- a/docs/tutorials/autotvm/tune_conv2d_cuda.html
+++ b/docs/tutorials/autotvm/tune_conv2d_cuda.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Tuning High Performance Convolution on NVIDIA GPUs &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Tuning High Performance Convolution on NVIDIA GPUs &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -399,26 +399,26 @@ for this template</p>
    7 unroll_explicit: OtherOption([0, 1]) len=2
 )
 Get devices for measurement successfully!
-No: 1   GFLOPS: 29.89/29.89     result: MeasureResult(costs=(0.007745029785714286,), error_no=0, all_cost=3.028876304626465, timestamp=1601335316.9973547)      [(&#39;tile_f&#39;, [-1, 32, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7166780
-No: 2   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 3   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 4   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 5   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 6   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 7   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 8   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 9   GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 10  GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 11  GFLOPS: 0.00/29.89      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 12  GFLOPS: 49.33/49.33     result: MeasureResult(costs=(0.004693308181818182,), error_no=0, all_cost=3.1705777645111084, timestamp=1601335327.987154)      [(&#39;tile_f&#39;, [-1, 2, 8, 2]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2077980
-No: 13  GFLOPS: 0.00/49.33      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 14  GFLOPS: 73.80/73.80     result: MeasureResult(costs=(0.00313695178125,), error_no=0, all_cost=2.184481620788574, timestamp=1601335329.6290433)  [(&#39;tile_f&#39;, [-1, 2, 16, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,8726459
-No: 15  GFLOPS: 27.60/73.80     result: MeasureResult(costs=(0.008387928916666667,), error_no=0, all_cost=2.155811071395874, timestamp=1601335330.810771)       [(&#39;tile_f&#39;, [-1, 1, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5905444
-No: 16  GFLOPS: 1.61/73.80      result: MeasureResult(costs=(0.143430149,), error_no=0, all_cost=4.961726427078247, timestamp=1601335333.5001879)       [(&#39;tile_f&#39;, [-1, 2, 8, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7428895
-No: 17  GFLOPS: 0.00/73.80      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68c287) [0x7f169a728287]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f169a727d26]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
-No: 18  GFLOPS: 0.00/73.80      result: MeasureResult(costs=(RuntimeError(&#39;Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11df8a2) [0x7f169b27b8a2]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f169b27ce9b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
-No: 19  GFLOPS: 23.76/73.80     result: MeasureResult(costs=(0.009743294818181819,), error_no=0, all_cost=1.9935996532440186, timestamp=1601335342.6303432)     [(&#39;tile_f&#39;, [-1, 2, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,782066
-No: 20  GFLOPS: 0.00/73.80      result: MeasureResult(costs=(RuntimeError(&#39;Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f169b245ca1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11df8a2) [0x7f169b27b8a2]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f169b27ce9b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
+No: 1   GFLOPS: 36.63/36.63     result: MeasureResult(costs=(0.006320022761904762,), error_no=0, all_cost=3.6816139221191406, timestamp=1601668684.1177537)     [(&#39;tile_f&#39;, [-1, 32, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7166780
+No: 2   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 3   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 4   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 5   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 6   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 7   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 8   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 9   GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 10  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 11  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 12  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(RuntimeError(&#39;Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMArrayAlloc+0xe2) [0x7f8cae07a9d2]\n  [bt] (4) /workspace/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector&lt;long, std::allocator&lt;long&gt; &gt;, DLDataType, DLContext)+0x206) [0x7f8cae07a836]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCDeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType [...]
+No: 13  GFLOPS: 0.00/36.63      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 14  GFLOPS: 83.32/83.32     result: MeasureResult(costs=(0.0027784159811320755,), error_no=0, all_cost=1.993711233139038, timestamp=1601668696.6355245)     [(&#39;tile_f&#39;, [-1, 2, 16, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,8726459
+No: 15  GFLOPS: 35.61/83.32     result: MeasureResult(costs=(0.0065010153125,), error_no=0, all_cost=1.6496946811676025, timestamp=1601668697.651509)   [(&#39;tile_f&#39;, [-1, 1, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5905444
+No: 16  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(RuntimeError(&#39;Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11dd342) [0x7f8cae0e1342]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f8cae0e292b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
+No: 17  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(InstantiationError(&#39;Traceback (most recent call last):\n  [bt] (4) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (3) /workspace/build/libtvm.so(+0x68ccf7) [0x7f8cad590cf7]\n  [bt] (2) /workspace/build/libtvm.so(tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const+0x3e6) [0x7f8cad590796]\n  [bt] (1) /workspace/build/libtvm.so(tvm::tir::transform::Prim [...]
+No: 18  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(RuntimeError(&#39;Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11dd342) [0x7f8cae0e1342]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f8cae0e292b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
+No: 19  GFLOPS: 29.61/83.32     result: MeasureResult(costs=(0.00781925976923077,), error_no=0, all_cost=1.381317377090454, timestamp=1601668714.405927)        [(&#39;tile_f&#39;, [-1, 2, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,782066
+No: 20  GFLOPS: 0.00/83.32      result: MeasureResult(costs=(RuntimeError(&#39;Traceback (most recent call last):\n  [bt] (5) /workspace/build/libtvm.so(TVMFuncCall+0x61) [0x7f8cae0abdb1]\n  [bt] (4) /workspace/build/libtvm.so(+0x11dd342) [0x7f8cae0e1342]\n  [bt] (3) /workspace/build/libtvm.so(tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const+0x26b) [0x7f8cae0e292b]\n  [bt] (2) /workspace/build/libtvm.so(tvm::runtime::RPCClientSession::Call [...]
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -457,7 +457,7 @@ and measure running time.</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Best config:
 [(&#39;tile_f&#39;, [-1, 2, 16, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,8726459
-Time cost of this operator: 0.003474
+Time cost of this operator: 0.002609
 </pre></div>
 </div>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/tutorials/autotvm/tune_relay_arm.html b/docs/tutorials/autotvm/tune_relay_arm.html
index deaa242..372d0bf 100644
--- a/docs/tutorials/autotvm/tune_relay_arm.html
+++ b/docs/tutorials/autotvm/tune_relay_arm.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Auto-tuning a convolutional network for ARM CPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Auto-tuning a convolutional network for ARM CPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/autotvm/tune_relay_cuda.html b/docs/tutorials/autotvm/tune_relay_cuda.html
index f2c2bcd..59777e1 100644
--- a/docs/tutorials/autotvm/tune_relay_cuda.html
+++ b/docs/tutorials/autotvm/tune_relay_cuda.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Auto-tuning a convolutional network for NVIDIA GPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Auto-tuning a convolutional network for NVIDIA GPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/autotvm/tune_relay_mobile_gpu.html b/docs/tutorials/autotvm/tune_relay_mobile_gpu.html
index 26b93d0..e111147 100644
--- a/docs/tutorials/autotvm/tune_relay_mobile_gpu.html
+++ b/docs/tutorials/autotvm/tune_relay_mobile_gpu.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Auto-tuning a convolutional network for Mobile GPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Auto-tuning a convolutional network for Mobile GPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/autotvm/tune_relay_x86.html b/docs/tutorials/autotvm/tune_relay_x86.html
index f3d0535..e911953 100644
--- a/docs/tutorials/autotvm/tune_relay_x86.html
+++ b/docs/tutorials/autotvm/tune_relay_x86.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Auto-tuning a convolutional network for x86 CPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Auto-tuning a convolutional network for x86 CPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/autotvm/tune_simple_template.html b/docs/tutorials/autotvm/tune_simple_template.html
index 5d0fa98..8439f29 100644
--- a/docs/tutorials/autotvm/tune_simple_template.html
+++ b/docs/tutorials/autotvm/tune_simple_template.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Writing tunable template and Using auto-tuner &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Writing tunable template and Using auto-tuner &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -492,16 +492,16 @@ used to get the best config later.</p>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Get devices for measurement successfully!
-No: 1   GFLOPS: 9.95/9.95       result: MeasureResult(costs=(0.0269896166,), error_no=0, all_cost=0.985429048538208, timestamp=1601335288.7511094)      [(&#39;tile_y&#39;, [-1, 8]), (&#39;tile_x&#39;, [-1, 32])],None,53
-No: 2   GFLOPS: 12.65/12.65     result: MeasureResult(costs=(0.021222210999999998,), error_no=0, all_cost=1.316232442855835, timestamp=1601335289.859437)       [(&#39;tile_y&#39;, [-1, 128]), (&#39;tile_x&#39;, [-1, 256])],None,87
-No: 3   GFLOPS: 15.51/15.51     result: MeasureResult(costs=(0.0173104734,), error_no=0, all_cost=1.3434505462646484, timestamp=1601335290.9338927)     [(&#39;tile_y&#39;, [-1, 8]), (&#39;tile_x&#39;, [-1, 512])],None,93
-No: 4   GFLOPS: 13.09/15.51     result: MeasureResult(costs=(0.0205033932,), error_no=0, all_cost=1.124967098236084, timestamp=1601335292.0556455)      [(&#39;tile_y&#39;, [-1, 128]), (&#39;tile_x&#39;, [-1, 512])],None,97
-No: 5   GFLOPS: 2.09/15.51      result: MeasureResult(costs=(0.1285110386,), error_no=0, all_cost=2.9166505336761475, timestamp=1601335294.8791656)     [(&#39;tile_y&#39;, [-1, 256]), (&#39;tile_x&#39;, [-1, 4])],None,28
-No: 6   GFLOPS: 9.10/15.51      result: MeasureResult(costs=(0.0294933192,), error_no=0, all_cost=1.184211254119873, timestamp=1601335296.1353078)      [(&#39;tile_y&#39;, [-1, 4]), (&#39;tile_x&#39;, [-1, 32])],None,52
-No: 7   GFLOPS: 12.65/15.51     result: MeasureResult(costs=(0.0212155018,), error_no=0, all_cost=1.0387556552886963, timestamp=1601335297.3753889)     [(&#39;tile_y&#39;, [-1, 2]), (&#39;tile_x&#39;, [-1, 512])],None,91
-No: 8   GFLOPS: 10.79/15.51     result: MeasureResult(costs=(0.024885154200000002,), error_no=0, all_cost=1.033494234085083, timestamp=1601335298.574376)       [(&#39;tile_y&#39;, [-1, 2]), (&#39;tile_x&#39;, [-1, 256])],None,81
-No: 9   GFLOPS: 0.92/15.51      result: MeasureResult(costs=(0.2920148466,), error_no=0, all_cost=5.719310760498047, timestamp=1601335305.6552417)      [(&#39;tile_y&#39;, [-1, 128]), (&#39;tile_x&#39;, [-1, 2])],None,17
-No: 10  GFLOPS: 1.18/15.51      result: MeasureResult(costs=(0.2275096714,), error_no=0, all_cost=4.696672439575195, timestamp=1601335310.067456)       [(&#39;tile_y&#39;, [-1, 1]), (&#39;tile_x&#39;, [-1, 2])],None,10
+No: 1   GFLOPS: 4.53/4.53       result: MeasureResult(costs=(0.0592549474,), error_no=0, all_cost=1.813521385192871, timestamp=1601668657.3705232)      [(&#39;tile_y&#39;, [-1, 8]), (&#39;tile_x&#39;, [-1, 32])],None,53
+No: 2   GFLOPS: 16.86/16.86     result: MeasureResult(costs=(0.0159222514,), error_no=0, all_cost=1.2602179050445557, timestamp=1601668658.325927)      [(&#39;tile_y&#39;, [-1, 128]), (&#39;tile_x&#39;, [-1, 256])],None,87
+No: 3   GFLOPS: 13.18/16.86     result: MeasureResult(costs=(0.0203596778,), error_no=0, all_cost=1.2592532634735107, timestamp=1601668659.301608)      [(&#39;tile_y&#39;, [-1, 8]), (&#39;tile_x&#39;, [-1, 512])],None,93
+No: 4   GFLOPS: 12.46/16.86     result: MeasureResult(costs=(0.0215495506,), error_no=0, all_cost=1.2881958484649658, timestamp=1601668660.275171)      [(&#39;tile_y&#39;, [-1, 128]), (&#39;tile_x&#39;, [-1, 512])],None,97
+No: 5   GFLOPS: 2.87/16.86      result: MeasureResult(costs=(0.0934988552,), error_no=0, all_cost=2.17838454246521, timestamp=1601668662.29342) [(&#39;tile_y&#39;, [-1, 256]), (&#39;tile_x&#39;, [-1, 4])],None,28
+No: 6   GFLOPS: 5.08/16.86      result: MeasureResult(costs=(0.0528449238,), error_no=0, all_cost=1.7616806030273438, timestamp=1601668663.805897)      [(&#39;tile_y&#39;, [-1, 4]), (&#39;tile_x&#39;, [-1, 32])],None,52
+No: 7   GFLOPS: 10.76/16.86     result: MeasureResult(costs=(0.024955729,), error_no=0, all_cost=1.1567623615264893, timestamp=1601668664.8381867)      [(&#39;tile_y&#39;, [-1, 2]), (&#39;tile_x&#39;, [-1, 512])],None,91
+No: 8   GFLOPS: 13.48/16.86     result: MeasureResult(costs=(0.0199137338,), error_no=0, all_cost=1.1330161094665527, timestamp=1601668665.7802334)     [(&#39;tile_y&#39;, [-1, 2]), (&#39;tile_x&#39;, [-1, 256])],None,81
+No: 9   GFLOPS: 1.33/16.86      result: MeasureResult(costs=(0.2017065944,), error_no=0, all_cost=4.308288097381592, timestamp=1601668669.9560215)      [(&#39;tile_y&#39;, [-1, 128]), (&#39;tile_x&#39;, [-1, 2])],None,17
+No: 10  GFLOPS: 1.37/16.86      result: MeasureResult(costs=(0.1966458516,), error_no=0, all_cost=3.632107973098755, timestamp=1601668673.6803129)      [(&#39;tile_y&#39;, [-1, 1]), (&#39;tile_x&#39;, [-1, 2])],None,10
 </pre></div>
 </div>
 <p>Finally we apply history best from the cache file and check its correctness.
diff --git a/docs/tutorials/dev/bring_your_own_datatypes.html b/docs/tutorials/dev/bring_your_own_datatypes.html
index 22ca708..c354ff5 100644
--- a/docs/tutorials/dev/bring_your_own_datatypes.html
+++ b/docs/tutorials/dev/bring_your_own_datatypes.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Bring Your Own Datatypes to TVM &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Bring Your Own Datatypes to TVM &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/dev/low_level_custom_pass.html b/docs/tutorials/dev/low_level_custom_pass.html
index 6c8001e..baaf4e8 100644
--- a/docs/tutorials/dev/low_level_custom_pass.html
+++ b/docs/tutorials/dev/low_level_custom_pass.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Writing a Customized Pass &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Writing a Customized Pass &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -254,8 +254,8 @@ our customized lowering pass to manipulate the IR directly instead of using sche
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>primfn(a_1: handle, b_1: handle, c_1: handle) -&gt; ()
   attr = {&quot;global_symbol&quot;: &quot;main&quot;, &quot;tir.noalias&quot;: True}
-  buffers = {b: Buffer(b_2: Pointer(float32), float32, [128], []),
-             c: Buffer(c_2: Pointer(float32), float32, [128], []),
+  buffers = {c: Buffer(c_2: Pointer(float32), float32, [128], []),
+             b: Buffer(b_2: Pointer(float32), float32, [128], []),
              a: Buffer(a_2: Pointer(float32), float32, [128], [])}
   buffer_map = {a_1: a, b_1: b, c_1: c} {
   for (i: int32, 0, 128) {
diff --git a/docs/tutorials/dev/sg_execution_times.html b/docs/tutorials/dev/sg_execution_times.html
index 7b3d30e..7c57bcf 100644
--- a/docs/tutorials/dev/sg_execution_times.html
+++ b/docs/tutorials/dev/sg_execution_times.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Computation times &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Computation times &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -66,7 +66,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -192,11 +192,11 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-tutorials-dev-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:32.460</strong> total execution time for <strong>tutorials_dev</strong> files:</p>
+<p><strong>00:29.319</strong> total execution time for <strong>tutorials_dev</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:31.830</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-tutorials-dev-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
-<li><p><strong>00:00.442</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-tutorials-dev-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
-<li><p><strong>00:00.189</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-tutorials-dev-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
+<li><p><strong>00:28.796</strong>: <a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-tutorials-dev-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></li>
+<li><p><strong>00:00.362</strong>: <a class="reference internal" href="use_pass_infra.html#sphx-glr-tutorials-dev-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></li>
+<li><p><strong>00:00.161</strong>: <a class="reference internal" href="low_level_custom_pass.html#sphx-glr-tutorials-dev-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/tutorials/dev/use_pass_infra.html b/docs/tutorials/dev/use_pass_infra.html
index 0d8e467..2bdc498 100644
--- a/docs/tutorials/dev/use_pass_infra.html
+++ b/docs/tutorials/dev/use_pass_infra.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>How to Use TVM Pass Infra &mdash; tvm 0.7.dev1 documentation</title>
+  <title>How to Use TVM Pass Infra &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/build_gcn.html b/docs/tutorials/frontend/build_gcn.html
index 7b93971..457ff7c 100644
--- a/docs/tutorials/frontend/build_gcn.html
+++ b/docs/tutorials/frontend/build_gcn.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Building a Graph Convolutional Network &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Building a Graph Convolutional Network &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/deploy_model_on_android.html b/docs/tutorials/frontend/deploy_model_on_android.html
index ba8a667..dd78b8f 100644
--- a/docs/tutorials/frontend/deploy_model_on_android.html
+++ b/docs/tutorials/frontend/deploy_model_on_android.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy the Pretrained Model on Android &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy the Pretrained Model on Android &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -533,7 +533,7 @@ to the remote android device.</p>
 <p class="sphx-glr-script-out">Out:</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>TVM prediction top-1: tiger cat
 Evaluate inference time cost...
-Mean inference time (std dev): 25.35 ms (0.05 ms)
+Mean inference time (std dev): 16.55 ms (1.74 ms)
 </pre></div>
 </div>
 </div>
diff --git a/docs/tutorials/frontend/deploy_model_on_rasp.html b/docs/tutorials/frontend/deploy_model_on_rasp.html
index 938da56..e70aae2 100644
--- a/docs/tutorials/frontend/deploy_model_on_rasp.html
+++ b/docs/tutorials/frontend/deploy_model_on_rasp.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy the Pretrained Model on Raspberry Pi &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy the Pretrained Model on Raspberry Pi &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/deploy_object_detection_pytorch.html b/docs/tutorials/frontend/deploy_object_detection_pytorch.html
index bf55305..e471083 100644
--- a/docs/tutorials/frontend/deploy_object_detection_pytorch.html
+++ b/docs/tutorials/frontend/deploy_object_detection_pytorch.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile PyTorch Object Detection Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile PyTorch Object Detection Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -377,7 +377,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  50.257 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  35.114 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-frontend-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/ec94e7a109437cf90cddcc60a7b5aaea/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/tutorials/frontend/deploy_prequantized.html b/docs/tutorials/frontend/deploy_prequantized.html
index 0099dfc..ee7c8b3 100644
--- a/docs/tutorials/frontend/deploy_prequantized.html
+++ b/docs/tutorials/frontend/deploy_prequantized.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy a Framework-prequantized Model with TVM &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy a Framework-prequantized Model with TVM &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -418,7 +418,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>196 in 1000 raw floating outputs identical.
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>188 in 1000 raw floating outputs identical.
 </pre></div>
 </div>
 </div>
@@ -433,7 +433,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Elapsed average ms: 20.04376112
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Elapsed average ms: 21.65541768
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/tutorials/frontend/deploy_prequantized_tflite.html b/docs/tutorials/frontend/deploy_prequantized_tflite.html
index ad90020..a234afd 100644
--- a/docs/tutorials/frontend/deploy_prequantized_tflite.html
+++ b/docs/tutorials/frontend/deploy_prequantized_tflite.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite) &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite) &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -445,7 +445,7 @@ TFLite Top-5 labels: [387 102 386 341 880]
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Elapsed average ms: 36.152443829999996
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>Elapsed average ms: 33.15524846
 </pre></div>
 </div>
 <div class="admonition note">
@@ -472,7 +472,7 @@ device and follow <a class="reference external" href="https://tvm.apache.org/doc
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  36.780 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  16.810 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-frontend-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/5c443f88ea44ce77c5ccade429af6e74/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/tutorials/frontend/deploy_quantized.html b/docs/tutorials/frontend/deploy_quantized.html
index ef8957f..f986ce7 100644
--- a/docs/tutorials/frontend/deploy_quantized.html
+++ b/docs/tutorials/frontend/deploy_quantized.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy a Quantized Model on Cuda &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy a Quantized Model on Cuda &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/deploy_sparse.html b/docs/tutorials/frontend/deploy_sparse.html
index ba8af21..0495580 100644
--- a/docs/tutorials/frontend/deploy_sparse.html
+++ b/docs/tutorials/frontend/deploy_sparse.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy a Hugging Face Pruned Model on CPU &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy a Hugging Face Pruned Model on CPU &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/deploy_ssd_gluoncv.html b/docs/tutorials/frontend/deploy_ssd_gluoncv.html
index 6e17fe2..c188a45 100644
--- a/docs/tutorials/frontend/deploy_ssd_gluoncv.html
+++ b/docs/tutorials/frontend/deploy_ssd_gluoncv.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Deploy Single Shot Multibox Detector(SSD) model &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Deploy Single Shot Multibox Detector(SSD) model &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -453,7 +453,7 @@ Cannot find config for target=cuda -keys=cuda,gpu -max_num_threads=1024 -thread_
 </pre></div>
 </div>
 <img alt="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" class="sphx-glr-single-img" src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" />
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  53.859 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  26.063 seconds)</p>
 <div class="sphx-glr-footer class sphx-glr-footer-example docutils container" id="sphx-glr-download-tutorials-frontend-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/ca08de6c440df207921d807474d26f06/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/tutorials/frontend/from_caffe2.html b/docs/tutorials/frontend/from_caffe2.html
index 9402d76..9e3436e 100644
--- a/docs/tutorials/frontend/from_caffe2.html
+++ b/docs/tutorials/frontend/from_caffe2.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile Caffe2 Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile Caffe2 Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_coreml.html b/docs/tutorials/frontend/from_coreml.html
index db799e7..9f9db4d 100644
--- a/docs/tutorials/frontend/from_coreml.html
+++ b/docs/tutorials/frontend/from_coreml.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile CoreML Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile CoreML Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_darknet.html b/docs/tutorials/frontend/from_darknet.html
index 3b8209e..a20640e 100644
--- a/docs/tutorials/frontend/from_darknet.html
+++ b/docs/tutorials/frontend/from_darknet.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile YOLO-V2 and YOLO-V3 in DarkNet Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile YOLO-V2 and YOLO-V3 in DarkNet Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_keras.html b/docs/tutorials/frontend/from_keras.html
index a152018..d0b7258 100644
--- a/docs/tutorials/frontend/from_keras.html
+++ b/docs/tutorials/frontend/from_keras.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile Keras Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile Keras Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_mxnet.html b/docs/tutorials/frontend/from_mxnet.html
index e83dbdc..d145ee9 100644
--- a/docs/tutorials/frontend/from_mxnet.html
+++ b/docs/tutorials/frontend/from_mxnet.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile MXNet Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile MXNet Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_onnx.html b/docs/tutorials/frontend/from_onnx.html
index 573d803..55e9184 100644
--- a/docs/tutorials/frontend/from_onnx.html
+++ b/docs/tutorials/frontend/from_onnx.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile ONNX Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile ONNX Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -313,9 +313,9 @@ we skip the pytorch model construction part, and download the saved onnx model</
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>...47%, 0.01 MB, 39 KB/s, 0 seconds passed
-...94%, 0.02 MB, 76 KB/s, 0 seconds passed
-...100%, 0.02 MB, 115 KB/s, 0 seconds passed
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>...47%, 0.01 MB, 93 KB/s, 0 seconds passed
+...94%, 0.02 MB, 186 KB/s, 0 seconds passed
+...100%, 0.02 MB, 278 KB/s, 0 seconds passed
 Cannot find config for target=llvm -keys=cpu, workload=(&#39;conv2d_NCHWc.x86&#39;, (&#39;TENSOR&#39;, (1, 32, 224, 224), &#39;float32&#39;), (&#39;TENSOR&#39;, (9, 32, 3, 3), &#39;float32&#39;), (1, 1), (1, 1, 1, 1), (1, 1), &#39;NCHW&#39;, &#39;NCHW&#39;, &#39;float32&#39;). A fallback configuration is used, which may bring great performance regression.
 Cannot find config for target=llvm -keys=cpu, workload=(&#39;conv2d_NCHWc.x86&#39;, (&#39;TENSOR&#39;, (1, 64, 224, 224), &#39;float32&#39;), (&#39;TENSOR&#39;, (32, 64, 3, 3), &#39;float32&#39;), (1, 1), (1, 1, 1, 1), (1, 1), &#39;NCHW&#39;, &#39;NCHW&#39;, &#39;float32&#39;). A fallback configuration is used, which may bring great performance regression.
 Cannot find config for target=llvm -keys=cpu, workload=(&#39;conv2d_NCHWc.x86&#39;, (&#39;TENSOR&#39;, (1, 1, 224, 224), &#39;float32&#39;), (&#39;TENSOR&#39;, (64, 1, 5, 5), &#39;float32&#39;), (1, 1), (2, 2, 2, 2), (1, 1), &#39;NCHW&#39;, &#39;NCHW&#39;, &#39;float32&#39;). A fallback configuration is used, which may bring great performance regression.
diff --git a/docs/tutorials/frontend/from_pytorch.html b/docs/tutorials/frontend/from_pytorch.html
index 11be39c..13042f5 100644
--- a/docs/tutorials/frontend/from_pytorch.html
+++ b/docs/tutorials/frontend/from_pytorch.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile PyTorch Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile PyTorch Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_tensorflow.html b/docs/tutorials/frontend/from_tensorflow.html
index 703c801..56f5139 100644
--- a/docs/tutorials/frontend/from_tensorflow.html
+++ b/docs/tutorials/frontend/from_tensorflow.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile Tensorflow Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile Tensorflow Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/from_tflite.html b/docs/tutorials/frontend/from_tflite.html
index 951ad7c..5f0ff7e 100644
--- a/docs/tutorials/frontend/from_tflite.html
+++ b/docs/tutorials/frontend/from_tflite.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Compile TFLite Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Compile TFLite Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/frontend/sg_execution_times.html b/docs/tutorials/frontend/sg_execution_times.html
index c6f846b..23a078b 100644
--- a/docs/tutorials/frontend/sg_execution_times.html
+++ b/docs/tutorials/frontend/sg_execution_times.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Computation times &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Computation times &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -66,7 +66,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -192,27 +192,27 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-tutorials-frontend-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>10:37.661</strong> total execution time for <strong>tutorials_frontend</strong> files:</p>
+<p><strong>08:58.956</strong> total execution time for <strong>tutorials_frontend</strong> files:</p>
 <ul class="simple">
-<li><p><strong>02:36.780</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-tutorials-frontend-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
-<li><p><strong>01:53.859</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-tutorials-frontend-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
-<li><p><strong>01:50.257</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-tutorials-frontend-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
-<li><p><strong>00:39.325</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-tutorials-frontend-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
-<li><p><strong>00:37.911</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-tutorials-frontend-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
-<li><p><strong>00:31.934</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-tutorials-frontend-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
-<li><p><strong>00:26.331</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-tutorials-frontend-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
-<li><p><strong>00:22.983</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-tutorials-frontend-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
-<li><p><strong>00:16.754</strong>: <a class="reference internal" href="from_caffe2.html#sphx-glr-tutorials-frontend-from-caffe2-py"><span class="std std-ref">Compile Caffe2 Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_caffe2.py</span></code>)</p></li>
-<li><p><strong>00:15.161</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-tutorials-frontend-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
-<li><p><strong>00:14.067</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-tutorials-frontend-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
-<li><p><strong>00:11.641</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-tutorials-frontend-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
-<li><p><strong>00:11.624</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-tutorials-frontend-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
-<li><p><strong>00:09.558</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-tutorials-frontend-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
-<li><p><strong>00:08.824</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-tutorials-frontend-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
-<li><p><strong>00:05.608</strong>: <a class="reference internal" href="build_gcn.html#sphx-glr-tutorials-frontend-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></li>
-<li><p><strong>00:02.880</strong>: <a class="reference internal" href="using_external_lib.html#sphx-glr-tutorials-frontend-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></li>
-<li><p><strong>00:01.993</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-tutorials-frontend-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
-<li><p><strong>00:00.173</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-tutorials-frontend-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
+<li><p><strong>02:16.810</strong>: <a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-tutorials-frontend-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></li>
+<li><p><strong>01:35.114</strong>: <a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-tutorials-frontend-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></li>
+<li><p><strong>01:26.063</strong>: <a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-tutorials-frontend-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></li>
+<li><p><strong>00:36.010</strong>: <a class="reference internal" href="deploy_prequantized.html#sphx-glr-tutorials-frontend-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></li>
+<li><p><strong>00:29.040</strong>: <a class="reference internal" href="from_tensorflow.html#sphx-glr-tutorials-frontend-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></li>
+<li><p><strong>00:24.832</strong>: <a class="reference internal" href="deploy_quantized.html#sphx-glr-tutorials-frontend-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></li>
+<li><p><strong>00:21.702</strong>: <a class="reference internal" href="from_tflite.html#sphx-glr-tutorials-frontend-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></li>
+<li><p><strong>00:20.680</strong>: <a class="reference internal" href="from_darknet.html#sphx-glr-tutorials-frontend-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></li>
+<li><p><strong>00:16.347</strong>: <a class="reference internal" href="from_caffe2.html#sphx-glr-tutorials-frontend-from-caffe2-py"><span class="std std-ref">Compile Caffe2 Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_caffe2.py</span></code>)</p></li>
+<li><p><strong>00:13.192</strong>: <a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-tutorials-frontend-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></li>
+<li><p><strong>00:12.357</strong>: <a class="reference internal" href="deploy_model_on_android.html#sphx-glr-tutorials-frontend-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></li>
+<li><p><strong>00:10.381</strong>: <a class="reference internal" href="from_pytorch.html#sphx-glr-tutorials-frontend-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></li>
+<li><p><strong>00:10.004</strong>: <a class="reference internal" href="from_keras.html#sphx-glr-tutorials-frontend-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></li>
+<li><p><strong>00:09.348</strong>: <a class="reference internal" href="from_coreml.html#sphx-glr-tutorials-frontend-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></li>
+<li><p><strong>00:08.209</strong>: <a class="reference internal" href="from_mxnet.html#sphx-glr-tutorials-frontend-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></li>
+<li><p><strong>00:04.892</strong>: <a class="reference internal" href="build_gcn.html#sphx-glr-tutorials-frontend-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></li>
+<li><p><strong>00:02.176</strong>: <a class="reference internal" href="using_external_lib.html#sphx-glr-tutorials-frontend-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></li>
+<li><p><strong>00:01.644</strong>: <a class="reference internal" href="from_onnx.html#sphx-glr-tutorials-frontend-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></li>
+<li><p><strong>00:00.155</strong>: <a class="reference internal" href="deploy_sparse.html#sphx-glr-tutorials-frontend-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/tutorials/frontend/using_external_lib.html b/docs/tutorials/frontend/using_external_lib.html
index 5be93ff..a57b8e7 100644
--- a/docs/tutorials/frontend/using_external_lib.html
+++ b/docs/tutorials/frontend/using_external_lib.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Using External Libraries in Relay &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Using External Libraries in Relay &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
diff --git a/docs/tutorials/get_started/cross_compilation_and_rpc.html b/docs/tutorials/get_started/cross_compilation_and_rpc.html
index bd12475..f793c4d 100644
--- a/docs/tutorials/get_started/cross_compilation_and_rpc.html
+++ b/docs/tutorials/get_started/cross_compilation_and_rpc.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Cross Compilation and RPC &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Cross Compilation and RPC &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -102,6 +102,7 @@
 <p class="caption"><span class="caption-text">Tutorials</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="../index.html">Get Started Tutorials</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="tvmc_command_line_driver.html">Getting Started with TVM command line driver - TVMC</a></li>
 <li class="toctree-l2"><a class="reference internal" href="relay_quick_start.html">Quick Start Tutorial for Compiling Deep Learning Models</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">Cross Compilation and RPC</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="#build-tvm-runtime-on-device">Build TVM Runtime on Device</a></li>
@@ -378,7 +379,7 @@ device and returns the measured cost. Network overhead is excluded.</p>
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>1.213e-07 secs/op
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>1.794e-07 secs/op
 </pre></div>
 </div>
 </div>
diff --git a/docs/tutorials/get_started/relay_quick_start.html b/docs/tutorials/get_started/relay_quick_start.html
index f20f53a..d4ac6b6 100644
--- a/docs/tutorials/get_started/relay_quick_start.html
+++ b/docs/tutorials/get_started/relay_quick_start.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Quick Start Tutorial for Compiling Deep Learning Models &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Quick Start Tutorial for Compiling Deep Learning Models &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -41,7 +41,7 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="Cross Compilation and RPC" href="cross_compilation_and_rpc.html" />
-    <link rel="prev" title="Get Started Tutorials" href="../index.html" /> 
+    <link rel="prev" title="Getting Started with TVM command line driver - TVMC" href="tvmc_command_line_driver.html" /> 
 </head>
 
 <body class="wy-body-for-nav">
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -102,6 +102,7 @@
 <p class="caption"><span class="caption-text">Tutorials</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="../index.html">Get Started Tutorials</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="tvmc_command_line_driver.html">Getting Started with TVM command line driver - TVMC</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">Quick Start Tutorial for Compiling Deep Learning Models</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="#overview-for-supported-hardware-backend-of-tvm">Overview for Supported Hardware Backend of TVM</a></li>
 <li class="toctree-l3"><a class="reference internal" href="#define-neural-network-in-relay">Define Neural Network in Relay</a></li>
@@ -381,57 +382,57 @@ in this example. Then the machine code will be generated as the module library.<
 </pre></div>
 </div>
 <p class="sphx-glr-script-out">Out:</p>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>...1%, 0.01 MB, 68 KB/s, 0 seconds passed
-...3%, 0.02 MB, 127 KB/s, 0 seconds passed
-...5%, 0.02 MB, 190 KB/s, 0 seconds passed
-...7%, 0.03 MB, 251 KB/s, 0 seconds passed
-...9%, 0.04 MB, 312 KB/s, 0 seconds passed
-...11%, 0.05 MB, 355 KB/s, 0 seconds passed
-...13%, 0.05 MB, 411 KB/s, 0 seconds passed
-...15%, 0.06 MB, 467 KB/s, 0 seconds passed
-...17%, 0.07 MB, 525 KB/s, 0 seconds passed
-...19%, 0.08 MB, 580 KB/s, 0 seconds passed
-...21%, 0.09 MB, 634 KB/s, 0 seconds passed
-...23%, 0.09 MB, 670 KB/s, 0 seconds passed
-...25%, 0.10 MB, 721 KB/s, 0 seconds passed
-...27%, 0.11 MB, 775 KB/s, 0 seconds passed
-...29%, 0.12 MB, 830 KB/s, 0 seconds passed
-...31%, 0.12 MB, 879 KB/s, 0 seconds passed
-...33%, 0.13 MB, 934 KB/s, 0 seconds passed
-...35%, 0.14 MB, 983 KB/s, 0 seconds passed
-...37%, 0.15 MB, 1037 KB/s, 0 seconds passed
-...39%, 0.16 MB, 1086 KB/s, 0 seconds passed
-...41%, 0.16 MB, 1140 KB/s, 0 seconds passed
-...43%, 0.17 MB, 1186 KB/s, 0 seconds passed
-...45%, 0.18 MB, 1239 KB/s, 0 seconds passed
-...47%, 0.19 MB, 1292 KB/s, 0 seconds passed
-...49%, 0.20 MB, 1315 KB/s, 0 seconds passed
-...51%, 0.20 MB, 1367 KB/s, 0 seconds passed
-...53%, 0.21 MB, 1412 KB/s, 0 seconds passed
-...55%, 0.22 MB, 1463 KB/s, 0 seconds passed
-...57%, 0.23 MB, 1511 KB/s, 0 seconds passed
-...59%, 0.23 MB, 1563 KB/s, 0 seconds passed
-...61%, 0.24 MB, 1614 KB/s, 0 seconds passed
-...63%, 0.25 MB, 1665 KB/s, 0 seconds passed
-...65%, 0.26 MB, 1700 KB/s, 0 seconds passed
-...67%, 0.27 MB, 1750 KB/s, 0 seconds passed
-...69%, 0.27 MB, 1801 KB/s, 0 seconds passed
-...71%, 0.28 MB, 1852 KB/s, 0 seconds passed
-...73%, 0.29 MB, 1896 KB/s, 0 seconds passed
-...75%, 0.30 MB, 1946 KB/s, 0 seconds passed
-...77%, 0.30 MB, 1996 KB/s, 0 seconds passed
-...79%, 0.31 MB, 2047 KB/s, 0 seconds passed
-...81%, 0.32 MB, 2087 KB/s, 0 seconds passed
-...83%, 0.33 MB, 2137 KB/s, 0 seconds passed
-...85%, 0.34 MB, 2187 KB/s, 0 seconds passed
-...87%, 0.34 MB, 2237 KB/s, 0 seconds passed
-...89%, 0.35 MB, 2281 KB/s, 0 seconds passed
-...91%, 0.36 MB, 2330 KB/s, 0 seconds passed
-...93%, 0.37 MB, 2337 KB/s, 0 seconds passed
-...95%, 0.38 MB, 2386 KB/s, 0 seconds passed
-...97%, 0.38 MB, 2434 KB/s, 0 seconds passed
-...99%, 0.39 MB, 2483 KB/s, 0 seconds passed
-...100%, 0.40 MB, 2530 KB/s, 0 seconds passed
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre>...1%, 0.01 MB, 444 KB/s, 0 seconds passed
+...3%, 0.02 MB, 864 KB/s, 0 seconds passed
+...5%, 0.02 MB, 1282 KB/s, 0 seconds passed
+...7%, 0.03 MB, 1677 KB/s, 0 seconds passed
+...9%, 0.04 MB, 2034 KB/s, 0 seconds passed
+...11%, 0.05 MB, 2415 KB/s, 0 seconds passed
+...13%, 0.05 MB, 2741 KB/s, 0 seconds passed
+...15%, 0.06 MB, 3074 KB/s, 0 seconds passed
+...17%, 0.07 MB, 3426 KB/s, 0 seconds passed
+...19%, 0.08 MB, 3739 KB/s, 0 seconds passed
+...21%, 0.09 MB, 4036 KB/s, 0 seconds passed
+...23%, 0.09 MB, 4327 KB/s, 0 seconds passed
+...25%, 0.10 MB, 4608 KB/s, 0 seconds passed
+...27%, 0.11 MB, 4839 KB/s, 0 seconds passed
+...29%, 0.12 MB, 5145 KB/s, 0 seconds passed
+...31%, 0.12 MB, 5446 KB/s, 0 seconds passed
+...33%, 0.13 MB, 5729 KB/s, 0 seconds passed
+...35%, 0.14 MB, 5925 KB/s, 0 seconds passed
+...37%, 0.15 MB, 6217 KB/s, 0 seconds passed
+...39%, 0.16 MB, 6449 KB/s, 0 seconds passed
+...41%, 0.16 MB, 6729 KB/s, 0 seconds passed
+...43%, 0.17 MB, 6941 KB/s, 0 seconds passed
+...45%, 0.18 MB, 7146 KB/s, 0 seconds passed
+...47%, 0.19 MB, 7398 KB/s, 0 seconds passed
+...49%, 0.20 MB, 7621 KB/s, 0 seconds passed
+...51%, 0.20 MB, 7858 KB/s, 0 seconds passed
+...53%, 0.21 MB, 8041 KB/s, 0 seconds passed
+...55%, 0.22 MB, 8286 KB/s, 0 seconds passed
+...57%, 0.23 MB, 8538 KB/s, 0 seconds passed
+...59%, 0.23 MB, 8581 KB/s, 0 seconds passed
+...61%, 0.24 MB, 8806 KB/s, 0 seconds passed
+...63%, 0.25 MB, 9045 KB/s, 0 seconds passed
+...65%, 0.26 MB, 9296 KB/s, 0 seconds passed
+...67%, 0.27 MB, 9351 KB/s, 0 seconds passed
+...69%, 0.27 MB, 9589 KB/s, 0 seconds passed
+...71%, 0.28 MB, 9819 KB/s, 0 seconds passed
+...73%, 0.29 MB, 10055 KB/s, 0 seconds passed
+...75%, 0.30 MB, 10184 KB/s, 0 seconds passed
+...77%, 0.30 MB, 10416 KB/s, 0 seconds passed
+...79%, 0.31 MB, 10584 KB/s, 0 seconds passed
+...81%, 0.32 MB, 10814 KB/s, 0 seconds passed
+...83%, 0.33 MB, 10794 KB/s, 0 seconds passed
+...85%, 0.34 MB, 11013 KB/s, 0 seconds passed
+...87%, 0.34 MB, 11200 KB/s, 0 seconds passed
+...89%, 0.35 MB, 11413 KB/s, 0 seconds passed
+...91%, 0.36 MB, 11477 KB/s, 0 seconds passed
+...93%, 0.37 MB, 11677 KB/s, 0 seconds passed
+...95%, 0.38 MB, 11878 KB/s, 0 seconds passed
+...97%, 0.38 MB, 12085 KB/s, 0 seconds passed
+...99%, 0.39 MB, 12269 KB/s, 0 seconds passed
+...100%, 0.40 MB, 12421 KB/s, 0 seconds passed
 Cannot find config for target=cuda -keys=cuda,gpu -max_num_threads=1024 -model=unknown -thread_warp_size=32, workload=(&#39;dense_small_batch.cuda&#39;, (&#39;TENSOR&#39;, (1, 512), &#39;float32&#39;), (&#39;TENSOR&#39;, (1000, 512), &#39;float32&#39;), None, &#39;float32&#39;). A fallback configuration is used, which may bring great performance regression.
 </pre></div>
 </div>
@@ -521,7 +522,7 @@ back in deploy environment.</p>
         <a href="cross_compilation_and_rpc.html" class="btn btn-neutral float-right" title="Cross Compilation and RPC" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right"></span></a>
       
       
-        <a href="../index.html" class="btn btn-neutral float-left" title="Get Started Tutorials" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
+        <a href="tvmc_command_line_driver.html" class="btn btn-neutral float-left" title="Getting Started with TVM command line driver - TVMC" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left"></span> Previous</a>
       
     </div>
   
diff --git a/docs/tutorials/get_started/sg_execution_times.html b/docs/tutorials/get_started/sg_execution_times.html
index 1b3bf91..a981795 100644
--- a/docs/tutorials/get_started/sg_execution_times.html
+++ b/docs/tutorials/get_started/sg_execution_times.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Computation times &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Computation times &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -66,7 +66,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -192,11 +192,12 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-tutorials-get-started-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:16.435</strong> total execution time for <strong>tutorials_get_started</strong> files:</p>
+<p><strong>00:14.841</strong> total execution time for <strong>tutorials_get_started</strong> files:</p>
 <ul class="simple">
-<li><p><strong>00:15.950</strong>: <a class="reference internal" href="relay_quick_start.html#sphx-glr-tutorials-get-started-relay-quick-start-py"><span class="std std-ref">Quick Start Tutorial for Compiling Deep Learning Models</span></a> (<code class="docutils literal notranslate"><span class="pre">relay_quick_start.py</span></code>)</p></li>
-<li><p><strong>00:00.349</strong>: <a class="reference internal" href="tensor_expr_get_started.html#sphx-glr-tutorials-get-started-tensor-expr-get-started-py"><span class="std std-ref">Get Started with Tensor Expression</span></a> (<code class="docutils literal notranslate"><span class="pre">tensor_expr_get_started.py</span></code>)</p></li>
-<li><p><strong>00:00.136</strong>: <a class="reference internal" href="cross_compilation_and_rpc.html#sphx-glr-tutorials-get-started-cross-compilation-and-rpc-py"><span class="std std-ref">Cross Compilation and RPC</span></a> (<code class="docutils literal notranslate"><span class="pre">cross_compilation_and_rpc.py</span></code>)</p></li>
+<li><p><strong>00:14.193</strong>: <a class="reference internal" href="relay_quick_start.html#sphx-glr-tutorials-get-started-relay-quick-start-py"><span class="std std-ref">Quick Start Tutorial for Compiling Deep Learning Models</span></a> (<code class="docutils literal notranslate"><span class="pre">relay_quick_start.py</span></code>)</p></li>
+<li><p><strong>00:00.389</strong>: <a class="reference internal" href="tensor_expr_get_started.html#sphx-glr-tutorials-get-started-tensor-expr-get-started-py"><span class="std std-ref">Get Started with Tensor Expression</span></a> (<code class="docutils literal notranslate"><span class="pre">tensor_expr_get_started.py</span></code>)</p></li>
+<li><p><strong>00:00.173</strong>: <a class="reference internal" href="cross_compilation_and_rpc.html#sphx-glr-tutorials-get-started-cross-compilation-and-rpc-py"><span class="std std-ref">Cross Compilation and RPC</span></a> (<code class="docutils literal notranslate"><span class="pre">cross_compilation_and_rpc.py</span></code>)</p></li>
+<li><p><strong>00:00.087</strong>: <a class="reference internal" href="tvmc_command_line_driver.html#sphx-glr-tutorials-get-started-tvmc-command-line-driver-py"><span class="std std-ref">Getting Started with TVM command line driver - TVMC</span></a> (<code class="docutils literal notranslate"><span class="pre">tvmc_command_line_driver.py</span></code>)</p></li>
 </ul>
 </div>
 
diff --git a/docs/tutorials/get_started/tensor_expr_get_started.html b/docs/tutorials/get_started/tensor_expr_get_started.html
index 7335cee..daf4a07 100644
--- a/docs/tutorials/get_started/tensor_expr_get_started.html
+++ b/docs/tutorials/get_started/tensor_expr_get_started.html
@@ -8,7 +8,7 @@
   
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
   
-  <title>Get Started with Tensor Expression &mdash; tvm 0.7.dev1 documentation</title>
+  <title>Get Started with Tensor Expression &mdash; tvm 0.7.0 documentation</title>
   
 
   
@@ -68,7 +68,7 @@
             
             
               <div class="version">
-                0.7.dev1
+                0.7.0
               </div>
             
           
@@ -102,6 +102,7 @@
 <p class="caption"><span class="caption-text">Tutorials</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="../index.html">Get Started Tutorials</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="tvmc_command_line_driver.html">Getting Started with TVM command line driver - TVMC</a></li>
... 2317 lines suppressed ...