You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/07/18 23:19:09 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@6bad21e9fe711e6994df238e8a3edc89073b894b)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new c8a4ef05a deploying docs (apache/tvm@6bad21e9fe711e6994df238e8a3edc89073b894b)
c8a4ef05a is described below

commit c8a4ef05aa700910cf541a53adece3f45e1eb3ad
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Mon Jul 18 23:19:02 2022 +0000

    deploying docs (apache/tvm@6bad21e9fe711e6994df238e8a3edc89073b894b)
---
 .../arch/device_target_interactions.rst.txt        |   4 +-
 .../how_to/compile_models/from_darknet.rst.txt     |   2 +-
 .../how_to/compile_models/from_mxnet.rst.txt       |   2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |   2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |   2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |   2 +-
 .../compile_models/sg_execution_times.rst.txt      |  22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |   2 +-
 .../deploy_object_detection_pytorch.rst.txt        |   4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |   6 +-
 .../deploy_prequantized_tflite.rst.txt             |   4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |   2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |   4 +-
 .../deploy_models/sg_execution_times.rst.txt       |  16 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |   2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |  10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |  16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |   2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |   2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |  16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |   8 +-
 .../sg_execution_times.rst.txt                     |  14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 | 523 ++++++++++++++++-----
 .../tune_network_cuda.rst.txt                      |   2 +-
 .../tune_network_x86.rst.txt                       |   4 +-
 .../tune_sparse_x86.rst.txt                        | 132 +++---
 .../tune_with_autotvm/sg_execution_times.rst.txt   |  10 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |  34 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |  16 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |  16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |   8 +-
 .../work_with_relay/sg_execution_times.rst.txt     |   6 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |   2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |  16 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |   2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |   4 +-
 .../frontend/deploy_classification.rst.txt         |   2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |   2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |   6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |   6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |   6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |  14 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |  20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |  54 +--
 .../tutorial/cross_compilation_and_rpc.rst.txt     |   2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |   2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |  20 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |  47 +-
 docs/arch/device_target_interactions.html          |   4 +-
 docs/commit_hash                                   |   2 +-
 docs/how_to/compile_models/from_darknet.html       |   2 +-
 docs/how_to/compile_models/from_mxnet.html         |   2 +-
 docs/how_to/compile_models/from_oneflow.html       |  12 +-
 docs/how_to/compile_models/from_pytorch.html       |   7 +-
 docs/how_to/compile_models/from_tensorflow.html    |   2 +-
 docs/how_to/compile_models/sg_execution_times.html |  30 +-
 .../deploy_models/deploy_model_on_android.html     |   2 +-
 .../deploy_object_detection_pytorch.html           |  17 +-
 docs/how_to/deploy_models/deploy_prequantized.html |   6 +-
 .../deploy_models/deploy_prequantized_tflite.html  |   4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |   2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |  37 +-
 docs/how_to/deploy_models/sg_execution_times.html  |  20 +-
 .../extend_tvm/bring_your_own_datatypes.html       |   2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |  10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |  16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |   2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |   2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |  16 +-
 .../optimize_operators/sg_execution_times.html     |   8 +-
 .../sg_execution_times.html                        |  14 +-
 .../tune_conv2d_layer_cuda.html                    | 523 ++++++++++++++++-----
 .../tune_with_autoscheduler/tune_network_cuda.html |   2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |   4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   | 132 +++---
 .../tune_with_autotvm/sg_execution_times.html      |  10 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |  34 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |  16 +-
 docs/how_to/work_with_microtvm/micro_train.html    |  16 +-
 .../work_with_microtvm/sg_execution_times.html     |   8 +-
 .../how_to/work_with_relay/sg_execution_times.html |   6 +-
 docs/how_to/work_with_schedules/intrin_math.html   |   2 +-
 .../work_with_schedules/sg_execution_times.html    |  16 +-
 docs/how_to/work_with_schedules/tensorize.html     |   2 +-
 docs/reference/api/doxygen/classes.html            |  32 +-
 .../classtvm_1_1TargetKindNode-members.html        |  21 +-
 .../api/doxygen/classtvm_1_1TargetKindNode.html    |  23 +-
 .../classtvm_1_1TargetKindNode__coll__graph.svg    |  99 ++--
 .../classtvm_1_1TargetKindNode__inherit__graph.svg |  95 ++--
 .../classtvm_1_1TargetKindRegEntry-members.html    |   3 +-
 .../doxygen/classtvm_1_1TargetKindRegEntry.html    |  39 +-
 ...classtvm_1_1TargetKindRegEntry__coll__graph.svg |  27 +-
 docs/reference/api/doxygen/functions_func_l.html   |   2 +-
 docs/reference/api/doxygen/functions_func_r.html   |   2 +-
 docs/reference/api/doxygen/functions_func_s.html   |  11 +-
 docs/reference/api/doxygen/functions_func_v.html   |   4 +-
 docs/reference/api/doxygen/functions_l.html        |   4 +-
 docs/reference/api/doxygen/functions_r.html        |   2 +-
 docs/reference/api/doxygen/functions_s.html        |  11 +-
 docs/reference/api/doxygen/functions_t.html        |  11 +-
 docs/reference/api/doxygen/functions_v.html        |   8 +-
 docs/reference/api/doxygen/functions_vars_t.html   |   3 +
 docs/reference/api/doxygen/hierarchy.html          |  87 ++--
 docs/reference/api/doxygen/inherit_graph_10.svg    |  16 +-
 docs/reference/api/doxygen/inherit_graph_108.svg   |  32 +-
 docs/reference/api/doxygen/inherit_graph_163.svg   |  19 +-
 docs/reference/api/doxygen/inherit_graph_164.svg   |  19 +-
 docs/reference/api/doxygen/inherit_graph_165.svg   |  18 +-
 docs/reference/api/doxygen/inherit_graph_166.svg   |  21 +-
 docs/reference/api/doxygen/inherit_graph_167.svg   |  24 +-
 docs/reference/api/doxygen/inherit_graph_168.svg   |  21 +-
 docs/reference/api/doxygen/inherit_graph_169.svg   |  18 +-
 docs/reference/api/doxygen/inherit_graph_170.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_171.svg   |  18 +-
 docs/reference/api/doxygen/inherit_graph_172.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_173.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_174.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_175.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_176.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_177.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_178.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_179.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_180.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_181.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_182.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_183.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_184.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_185.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_186.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_187.svg   |  14 +-
 docs/reference/api/doxygen/inherit_graph_188.svg   |  14 +-
 docs/reference/api/doxygen/inherit_graph_189.svg   |  28 +-
 docs/reference/api/doxygen/inherit_graph_190.svg   |  29 +-
 docs/reference/api/doxygen/inherit_graph_191.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_192.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_193.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_194.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_195.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_196.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_197.svg   |  17 +-
 docs/reference/api/doxygen/inherit_graph_198.svg   |  17 +-
 docs/reference/api/doxygen/inherit_graph_199.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_200.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_201.svg   |  14 +-
 docs/reference/api/doxygen/inherit_graph_202.svg   |  17 +-
 docs/reference/api/doxygen/inherit_graph_203.svg   |  80 +---
 docs/reference/api/doxygen/inherit_graph_204.svg   |  70 +--
 docs/reference/api/doxygen/inherit_graph_205.svg   |  79 +++-
 docs/reference/api/doxygen/inherit_graph_206.svg   |  19 +-
 docs/reference/api/doxygen/inherit_graph_207.svg   |  19 +-
 docs/reference/api/doxygen/inherit_graph_208.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_209.svg   |  15 +-
 docs/reference/api/doxygen/inherit_graph_210.svg   |  29 +-
 docs/reference/api/doxygen/inherit_graph_211.svg   |  24 +-
 docs/reference/api/doxygen/inherit_graph_212.svg   |  30 +-
 docs/reference/api/doxygen/inherit_graph_213.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_214.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_215.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_216.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_217.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_218.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_219.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_220.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_221.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_222.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_223.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_224.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_225.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_226.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_227.svg   |  12 +-
 docs/reference/api/doxygen/inherit_graph_228.svg   |  12 +-
 ...inherit_graph_228.svg => inherit_graph_229.svg} |   0
 docs/reference/api/doxygen/inherit_graph_39.svg    |  16 +-
 docs/reference/api/doxygen/inherit_graph_42.svg    |   8 +-
 docs/reference/api/doxygen/inherit_graph_43.svg    |   8 +-
 docs/reference/api/doxygen/inherits.html           | 134 +++---
 docs/reference/api/doxygen/namespacemembers_f.html |   3 +
 .../api/doxygen/namespacemembers_func_p.html       |   6 +-
 .../api/doxygen/namespacemembers_func_s.html       |   6 +-
 docs/reference/api/doxygen/namespacemembers_p.html |   6 +-
 docs/reference/api/doxygen/namespacemembers_s.html |   6 +-
 docs/reference/api/doxygen/namespacemembers_t.html |   3 +
 .../api/doxygen/namespacemembers_type.html         |   6 +
 docs/reference/api/doxygen/namespacetvm.html       |  42 ++
 docs/reference/api/doxygen/search/all_10.js        |   2 +-
 docs/reference/api/doxygen/search/all_11.js        |   4 +-
 docs/reference/api/doxygen/search/all_13.js        |   6 +-
 docs/reference/api/doxygen/search/all_14.js        |  19 +-
 docs/reference/api/doxygen/search/all_15.js        |  11 +-
 docs/reference/api/doxygen/search/all_16.js        |   2 +-
 docs/reference/api/doxygen/search/all_17.js        |   4 +-
 docs/reference/api/doxygen/search/all_7.js         |   1 +
 docs/reference/api/doxygen/search/all_9.js         |   2 +-
 docs/reference/api/doxygen/search/all_d.js         |   2 +-
 docs/reference/api/doxygen/search/all_e.js         |   2 +-
 docs/reference/api/doxygen/search/classes_10.js    |   6 +-
 docs/reference/api/doxygen/search/classes_11.js    |   5 +-
 docs/reference/api/doxygen/search/classes_13.js    |   2 +-
 docs/reference/api/doxygen/search/classes_7.js     |   2 +-
 docs/reference/api/doxygen/search/classes_a.js     |   2 +-
 docs/reference/api/doxygen/search/functions_10.js  |   4 +-
 docs/reference/api/doxygen/search/functions_12.js  |   4 +-
 docs/reference/api/doxygen/search/functions_13.js  |   7 +-
 docs/reference/api/doxygen/search/functions_15.js  |   2 +-
 docs/reference/api/doxygen/search/functions_16.js  |   2 +-
 docs/reference/api/doxygen/search/functions_c.js   |   2 +-
 docs/reference/api/doxygen/search/typedefs_5.js    |   1 +
 docs/reference/api/doxygen/search/typedefs_e.js    |   1 +
 docs/reference/api/doxygen/search/variables_12.js  |   1 +
 docs/reference/api/doxygen/target_8h_source.html   |   2 +-
 docs/reference/api/doxygen/target__kind_8h.html    |  13 +-
 .../api/doxygen/target__kind_8h_source.html        |  48 +-
 docs/reference/api/python/auto_scheduler.html      |   4 +-
 .../api/typedoc/classes/bytestreamreader.html      |  12 +-
 .../api/typedoc/classes/cachedcallstack.html       |  34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |  12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |  10 +-
 .../reference/api/typedoc/classes/environment.html |  12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |  20 +-
 .../api/typedoc/classes/graphexecutor.html         |  16 +-
 docs/reference/api/typedoc/classes/instance.html   |  40 +-
 docs/reference/api/typedoc/classes/memory.html     |  34 +-
 docs/reference/api/typedoc/classes/module.html     |  10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |  22 +-
 .../api/typedoc/classes/packedfunccell.html        |   6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |  14 +-
 docs/reference/api/typedoc/classes/scalar.html     |   6 +-
 .../api/typedoc/classes/webgpucontext.html         |  12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |  30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |   4 +-
 .../api/typedoc/enums/dldatatypecode.html          |   8 +-
 .../api/typedoc/enums/rpcserverstate.html          |  12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |  18 +-
 docs/reference/api/typedoc/index.html              | 112 ++---
 .../api/typedoc/interfaces/disposable.html         |   2 +-
 .../api/typedoc/interfaces/functioninfo.html       |   6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |   4 +-
 docs/searchindex.js                                |   2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |   4 +-
 .../tutorials/frontend/deploy_classification.html  |   2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |   2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |   6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |   6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |   6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |   6 +-
 docs/tutorial/autotvm_matmul_x86.html              |  20 +-
 docs/tutorial/autotvm_relay_x86.html               | 258 +++++-----
 docs/tutorial/cross_compilation_and_rpc.html       |   2 +-
 docs/tutorial/intro_topi.html                      |   2 +-
 docs/tutorial/sg_execution_times.html              |  24 +-
 docs/tutorial/tensor_expr_get_started.html         |  43 +-
 251 files changed, 2883 insertions(+), 2180 deletions(-)

diff --git a/docs/_sources/arch/device_target_interactions.rst.txt b/docs/_sources/arch/device_target_interactions.rst.txt
index 9c391d31b..ec8d52226 100644
--- a/docs/_sources/arch/device_target_interactions.rst.txt
+++ b/docs/_sources/arch/device_target_interactions.rst.txt
@@ -194,8 +194,8 @@ different code generation targets can run on the same physical device.
 device type.)
 
 All options for a specific target kind are added with the
-``add_attr_option`` function, with optional default values.  A
-preprocessor can be added with ``set_attrs_preprocessor`` to define
+``add_attr_option`` function, with optional default values.  A `Target`
+parser can be added with ``set_target_parser`` to process
 any parameters that are dynamically based on other parameters or
 queried from device properties.
 
diff --git a/docs/_sources/how_to/compile_models/from_darknet.rst.txt b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
index 1d8fd7df6..8785da53c 100644
--- a/docs/_sources/how_to/compile_models/from_darknet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
@@ -317,7 +317,7 @@ The process is no different from other examples.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  1.615 seconds)
+   **Total running time of the script:** ( 1 minutes  2.060 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_darknet.py:
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index 4dd2320b5..0b8a4efb6 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -115,7 +115,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip6f6e2b1b-6c0c-4741-84cf-56756c687355 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipd556f310-f866-47c2-99dd-fcd8f5721d18 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index 7010d8cbf..748e327f3 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -113,7 +113,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     13%|#2        | 5.19M/41.5M [00:00<00:01, 37.3MB/s]
     24%|##4       | 10.1M/41.5M [00:00<00:00, 43.3MB/s]
     56%|#####5    | 23.2M/41.5M [00:00<00:00, 81.6MB/s]
     76%|#######5  | 31.4M/41.5M [00:00<00:00, 74.9MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 76.4MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     15%|#5        | 6.33M/41.5M [00:00<00:00, 61.3MB/s]
     29%|##9       | 12.2M/41.5M [00:00<00:00, 51.9MB/s]
     41%|####1     | 17.2M/41.5M [00:00<00:00, 32.5MB/s]
     58%|#####7    | 24.0M/41.5M [00:00<00:00, 35.0MB/s]
     82%|########2 | 34.1M/41.5M [00:00<00:00, 51.7MB/s]
     96%|#########6| 40.0M/41.5M [00:00<00:00, 53.2MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 48.9MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 0cea40b6b..6b44d173e 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -94,7 +94,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     39%|###8      | 17.4M/44.7M [00:00<00:00, 182MB/s]
     91%|######### | 40.5M/44.7M [00:00<00:00, 217MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 217MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     11%|#         | 4.89M/44.7M [00:00<00:00, 51.3MB/s]
     22%|##1       | 9.78M/44.7M [00:00<00:00, 48.3MB/s]
     75%|#######5  | 33.5M/44.7M [00:00<00:00, 137MB/s] 
    100%|##########| 44.7M/44.7M [00:00<00:00, 132MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 3b7c0ca26..62cdc38bf 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -423,7 +423,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  3.149 seconds)
+   **Total running time of the script:** ( 1 minutes  1.017 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index db1bece00..ca3abe437 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:02.908** total execution time for **how_to_compile_models** files:
+**04:59.918** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:03.149 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:02.060 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:01.615 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:01.017 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:39.676 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:38.176 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:27.293 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:27.033 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:24.874 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:26.167 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:24.873 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:23.911 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.266 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:23.600 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:19.690 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:20.890 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:15.150 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:14.675 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.321 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.389 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 5a1fb1122..cfcb026df 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -441,7 +441,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.2943      16.2940      16.5826      16.1340       0.1329   
+      15.5571      15.5695      15.7189      15.3987       0.0868   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 467806e48..24a131592 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -123,7 +123,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
     10%|#         | 17.4M/170M [00:00<00:00, 182MB/s]
     25%|##5       | 42.9M/170M [00:00<00:00, 232MB/s]
     42%|####2     | 71.8M/170M [00:00<00:00, 264MB/s]
     59%|#####9    | 100M/170M [00:00<00:00, 278MB/s] 
     75%|#######4  | 127M/170M [00:00<00:00, 261MB/s]
     89%|########9 | 152M/170M [00:00<00:00, 257MB/s]
    100%|##########| 170M/170M [00:00<00:00, 254MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
     11%|#1        | 19.2M/170M [00:00<00:00, 202MB/s]
     25%|##5       | 43.0M/170M [00:00<00:00, 230MB/s]
     41%|####      | 69.5M/170M [00:00<00:00, 252MB/s]
     56%|#####6    | 95.5M/170M [00:00<00:00, 260MB/s]
     71%|#######   | 120M/170M [00:00<00:00, 255MB/s] 
     85%|########5 | 145M/170M [00:00<00:00, 252MB/s]
     99%|#########9| 169M/170M [00:00<00:00, 248MB/s]
    100%|##########| 170M/170M [00:00<00:00, 248MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -292,7 +292,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  4.193 seconds)
+   **Total running time of the script:** ( 2 minutes  55.583 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index 46ff4588c..6d61ffec4 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -232,7 +232,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 182MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 145MB/s]
 
 
 
@@ -412,7 +412,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.4726      90.3123      98.9719      90.1457       0.8869   
+      90.3941      90.2763      96.8060      90.0492       0.7160   
                
 
 
@@ -461,7 +461,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  10.526 seconds)
+   **Total running time of the script:** ( 1 minutes  8.056 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 4311e5438..8e9f73cb1 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -439,7 +439,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      120.6853     120.6432     122.8147     119.8759      0.4554   
+      120.4923     120.0808     146.6039     119.2306      2.9008   
                
 
 
@@ -476,7 +476,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  52.888 seconds)
+   **Total running time of the script:** ( 1 minutes  51.317 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index c197f81ae..d8da5aef4 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -255,7 +255,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  7.718 seconds)
+   **Total running time of the script:** ( 1 minutes  21.420 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 943786f4d..75af78454 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -158,7 +158,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5309/132723 [00:00<00:02, 50874.49KB/s]
     10%|9         | 13108/132723 [00:00<00:01, 66534.77KB/s]
     16%|#5        | 20979/132723 [00:00<00:01, 72038.19KB/s]
     22%|##1       | 29043/132723 [00:00<00:01, 75411.18KB/s]
     28%|##7       | 37032/132723 [00:00<00:01, 77016.96KB/s]
     34%|###3      | 45005/132723 [00:00<00:01, 77933.04KB/s]
     40%|###9      | 52806/132723 [00:00<00:01, 77914.47KB/s]
     46%|####5     | 60762/132723 [00:00<00:00, 78430.57KB/s]
     52%|#####1    | 68609/132723 [00:00<00:00, 77782.41KB/s]
     58%|#####7    | 76391/132723 [00:01<00:00, 77682.92KB/s]
     63%|######3   | 84162/132723 [00:01<00:00, 76956.61KB/s]
     69%|######9   | 91861/132723 [00:01<00:00, 76477.86KB/s]
     75%|#######4  | 99511/132723 [00:01<00:00, 76299.51KB/s]
     81%|########  | 107143/132723 [00:01<00:00, 76208.23KB/s]
     86%|########6 | 114794/132723 [00:01<00:00, 76284.47KB/s]
     92%|#########
 2| 122426/132723 [00:01<00:00, 76257.85KB/s]
     98%|#########8| 130171/132723 [00:01<00:00, 76613.18KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 75958.90KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5880/132723 [00:00<00:02, 58791.79KB/s]
     11%|#         | 14003/132723 [00:00<00:01, 71987.49KB/s]
     17%|#6        | 22075/132723 [00:00<00:01, 75972.62KB/s]
     23%|##2       | 30136/132723 [00:00<00:01, 77800.13KB/s]
     29%|##8       | 38351/132723 [00:00<00:01, 79365.25KB/s]
     35%|###5      | 46586/132723 [00:00<00:01, 80377.94KB/s]
     41%|####1     | 54807/132723 [00:00<00:00, 80974.29KB/s]
     47%|####7     | 62905/132723 [00:00<00:00, 80499.15KB/s]
     53%|#####3    | 70956/132723 [00:00<00:00, 80138.98KB/s]
     60%|#####9    | 78973/132723 [00:01<00:00, 80145.06KB/s]
     66%|######5   | 86988/132723 [00:01<00:00, 79612.47KB/s]
     72%|#######1  | 94951/132723 [00:01<00:00, 79317.07KB/s]
     78%|#######7  | 102884/132723 [00:01<00:00, 79207.81KB/s]
     83%|########3 | 110806/132723 [00:01<00:00, 79166.01KB/s]
     89%|########9 | 118761/132723 [00:01<00:00, 79277.74KB/s]
     95%|########
 #5| 126731/132723 [00:01<00:00, 79402.43KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 78885.78KB/s]
 
 
 
@@ -241,7 +241,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  36.725 seconds)
+   **Total running time of the script:** ( 2 minutes  29.597 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 50c6bc419..232eb55c9 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**10:45.635** total execution time for **how_to_deploy_models** files:
+**10:37.340** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:04.193 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 02:55.583 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:36.725 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:29.597 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:52.888 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:51.317 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:10.526 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:21.420 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:07.718 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:08.056 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:30.666 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:29.064 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.913 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.297 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 47724953b..446b1fb48 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -476,7 +476,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipd459995e-b6f1-4d10-b361-e0501c591274 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip1a596816-b990-4e41-b512-bf8d7b091a2a from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index c28227e91..0a93d18f4 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:41.777** total execution time for **how_to_extend_tvm** files:
+**00:40.193** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:38.544 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:37.083 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.264 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.185 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.961 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.914 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.008 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.010 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 9097ddc30..f9c031235 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -216,10 +216,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6842us [6842us] (46.48%; 46.48%)
-    FoldScaleAxis: 7878us [5us] (53.52%; 53.52%)
-            FoldConstant: 7872us [1610us] (53.48%; 99.93%)
-                    InferType: 6262us [6262us] (42.55%; 79.55%)
+    InferType: 6613us [6613us] (45.61%; 45.61%)
+    FoldScaleAxis: 7886us [5us] (54.39%; 54.39%)
+            FoldConstant: 7881us [1577us] (54.35%; 99.93%)
+                    InferType: 6303us [6303us] (43.47%; 79.98%)
 
 
 
@@ -258,10 +258,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6421us [6421us] (44.65%; 44.65%)
-    FoldScaleAxis: 7959us [5us] (55.35%; 55.35%)
-            FoldConstant: 7954us [1672us] (55.31%; 99.93%)
-                    InferType: 6282us [6282us] (43.68%; 78.98%)
+    InferType: 6220us [6220us] (44.68%; 44.68%)
+    FoldScaleAxis: 7700us [5us] (55.32%; 55.32%)
+            FoldConstant: 7695us [1596us] (55.28%; 99.94%)
+                    InferType: 6099us [6099us] (43.81%; 79.26%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index c430d7f99..52cca29e5 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -340,7 +340,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 54.188965 ms
+    Convolution: 34.777033 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index f33b03b08..78f4ae290 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -671,7 +671,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 6.942971 ms
+    conv2d with tensor core: 12.743089 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index b73efd687..198d65d8f 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -143,8 +143,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.019226
-    Baseline: 3.450500
+    Numpy running time: 0.018402
+    Baseline: 3.209169
 
 
 
@@ -239,7 +239,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.309829
+    Opt1: 0.290653
 
 
 
@@ -342,7 +342,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.346741
+    Opt2: 0.331112
 
 
 
@@ -438,7 +438,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.121748
+    Opt3: 0.117239
 
 
 
@@ -563,7 +563,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.110637
+    Opt4: 0.111911
 
 
 
@@ -685,7 +685,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.111510
+    Opt5: 0.111434
 
 
 
@@ -810,7 +810,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.145426
+    Opt6: 0.144853
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 94b01eaf6..433a28c68 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:34.997** total execution time for **how_to_optimize_operators** files:
+**00:33.786** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.679 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:31.406 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.271 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.348 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.046 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.032 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index c246d3bcc..1ee882654 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**06:14.584** total execution time for **how_to_tune_with_autoscheduler** files:
+**05:56.544** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 03:27.225 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 03:14.110 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:23.284 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:21.555 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:46.637 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:45.380 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:19.310 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:18.189 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:09.166 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:08.738 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.963 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.571 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 68dfd3466..9931ac18b 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -241,76 +241,222 @@ cooperative fetching, unrolling and operator fusion.
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
       attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 32;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [7]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [648]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [1152]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [7], [], scope="local", align=16)[0] = 0f32
+      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
+        conv2d_nchw_1[7] = 0f32
         conv2d_nchw_1[1] = 0f32
+        conv2d_nchw_1[8] = 0f32
         conv2d_nchw_1[2] = 0f32
+        conv2d_nchw_1[9] = 0f32
         conv2d_nchw_1[3] = 0f32
+        conv2d_nchw_1[10] = 0f32
         conv2d_nchw_1[4] = 0f32
+        conv2d_nchw_1[11] = 0f32
         conv2d_nchw_1[5] = 0f32
+        conv2d_nchw_1[12] = 0f32
         conv2d_nchw_1[6] = 0f32
-        for (rc.outer.outer: int32, 0, 64) {
-          let cse_var_2: int32 = (rc.outer.outer*392)
-          let cse_var_1: int32 = (rc.outer.outer*72)
-           {
-            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            pad_temp.shared_1: Buffer(pad_temp.shared, float32, [648], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else(((((9 <= floormod(threadIdx.x_1, 81)) && (floormod(threadIdx.x_1, 81) < 72)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 81)*49)) + (floordiv(floormod(threadIdx.x_1, 81), 9)*7)) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            pad_temp.shared_1[(threadIdx.x_1 + 112)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 31), 81)) && (floormod((threadIdx.x_1 + 31), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 112), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 31), 81), 9)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 62), 81)) && (floormod((threadIdx.x_1 + 62), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 224), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 62), 81), 9)*7)) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            pad_temp.shared_1[(threadIdx.x_1 + 336)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 12), 81)) && (floormod((threadIdx.x_1 + 12), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 336), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 12), 81), 9)*7)) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            pad_temp.shared_1[(threadIdx.x_1 + 448)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 43), 81)) && (floormod((threadIdx.x_1 + 43), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 448), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 43), 81), 9)*7)) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            if @tir.likely((threadIdx.x_1 < 88), dtype=bool) {
-              pad_temp.shared_1[(threadIdx.x_1 + 560)] = @tir.if_then_else(((((9 <= floormod((threadIdx.x_1 + 74), 81)) && (floormod((threadIdx.x_1 + 74), 81) < 72)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 560), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 74), 81), 9)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-            }
-            attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1: Buffer(kernel.shared, float32, [1152], [], scope="shared")[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 72)*4608)) + cse_var_1) + floormod(threadIdx.x_2, 72))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 112), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 4), 9), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 224), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 8), 72), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 336), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 48), 72), 9)*9)) + (floormod((floordiv(threadIdx.x_2, 3) + 1), 3)*3)) + floormod(threadIdx.x_2, 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 448), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 7), 9), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 560)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 560), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 72), 9)*9)) + floormod((threadIdx.x_2 + 2), 9))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 672), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 24), 72), 9)*9)) + (floormod((floordiv(threadIdx.x_2, 3) + 2), 3)*3)) + floormod(threadIdx.x_2, 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 64), 72), 9)*9)) + floormod((threadIdx.x_2 + 1), 9))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 896), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 32), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 5), 9), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            kernel.shared_1[(threadIdx.x_2 + 1008)] = kernel[(((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 72)*4608)) + cse_var_1) + floormod(threadIdx.x_2, 72)) + 64512)]
-            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 112;
-            if @tir.likely((threadIdx.x_2 < 32), dtype=bool) {
-              kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1120), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 4), 9), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-            }
-            for (rx.outer.inner: int32, 0, 3) {
-              for (rc.inner: int32, 0, 8) {
-                for (ry.inner: int32, 0, 3) {
-                  conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                  conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 9)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                  conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 18)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                  conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 27)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                  conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 36)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                  conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 45)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                  conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 54)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-                }
+        conv2d_nchw_1[13] = 0f32
+        for (rc.outer.outer: int32, 0, 16) {
+          for (rx.outer.outer: int32, 0, 3) {
+            let cse_var_2: int32 = (rc.outer.outer*1568)
+            let cse_var_1: int32 = (rc.outer.outer*288)
+             {
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+                pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope="shared")[((floordiv((threadIdx.x_1*12), 7)*7) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((7 <= floormod((threadIdx.x_1*12), 63)) && (floormod((threadIdx.x_1*12), 63) < 56)) && (1 <= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) && ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) < 8)), data[(((((cse_var_2 + (floordiv((threadIdx.x_1*12), 63)*49)) + (floordiv(floormod((threadIdx.x_ [...]
+                pad_temp.shared_1[(((floordiv((threadIdx.x_1*4), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 1), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 1), 63)) && (floormod(((threadIdx.x_1*12) + 1), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) < 8)), data[(((((cse_var_2 + (floordiv((threadIdx.x_1*4), 21)*49)) + (flo [...]
+                pad_temp.shared_1[(((floordiv((threadIdx.x_1*4), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 2), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 2), 63)) && (floormod(((threadIdx.x_1*12) + 2), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) < 8)), data[(((((cse_var_2 + (floordiv((threadIdx.x_1*4), 21)*49)) + (flo [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 1), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 3), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 3), 63)) && (floormod(((threadIdx.x_1*12) + 3), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 1), 21) [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 1), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 4), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 4), 63)) && (floormod(((threadIdx.x_1*12) + 4), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 1), 21) [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 1), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 5), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 5), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 5), 63)) && (floormod(((threadIdx.x_1*12) + 5), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 1), 21) [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 2), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 6), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 6), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 6), 63)) && (floormod(((threadIdx.x_1*12) + 6), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 2), 21) [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 2), 21)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 1), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv((threadIdx.x_1*12), 7) + 1), 9)) && (floormod((floordiv((threadIdx.x_1*12), 7) + 1), 9) < 8)) && (1 <= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) && ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 2), [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 2), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 8), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 8), 63)) && (floormod(((threadIdx.x_1*12) + 8), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 2), 21) [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 3), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 9), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 9), 63)) && (floormod(((threadIdx.x_1*12) + 9), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 3), 21) [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 3), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 10), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 10), 63)) && (floormod(((threadIdx.x_1*12) + 10), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 3),  [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 3), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 11), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((7 <= floormod(((threadIdx.x_1*12) + 11), 63)) && (floormod(((threadIdx.x_1*12) + 11), 63) < 56)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 3),  [...]
+              }
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*12) + 672), 63)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 6), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv((threadIdx.x_1*12), 7) + 6), 9)) && (floormod((floordiv((threadIdx.x_1*12), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) && ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*12) + [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 6), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 21)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 7), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv((threadIdx.x_1*12), 7) + 7), 9)) && (floormod((floordiv((threadIdx.x_1*12), 7) + 7), 9) < 8)) && (1 <= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) && ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 2 [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) < 8)), data[(((((cse_v [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 6), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 6), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) < 8)), data[(((((cse_v [...]
+              }
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*12) + 1344), 63)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 3), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv((threadIdx.x_1*12), 7) + 3), 9)) && (floormod((floordiv((threadIdx.x_1*12), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) && ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*12)  [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 448), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 448), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 449), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 449), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 449), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 450), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 6), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 450), 21)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 4), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv((threadIdx.x_1*12), 7) + 4), 9)) && (floormod((floordiv((threadIdx.x_1*12), 7) + 4), 9) < 8)) && (1 <= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) && ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) < 8)), data[(((((cse_var_2 + (floordiv(((threadIdx.x_1*4) + 4 [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 450), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 451), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) < 8)), data[(((((cse_var_ [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 451), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) < 8)), data[(((((cse_v [...]
+                pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 451), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 <= floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 3), 9)) && (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 3), 9) < 8)) && (1 <= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) && ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) < 8)), data[(((((cse_v [...]
+              }
+              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope="shared")[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 56), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 112), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 168), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 24), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 224), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 32), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 280), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 88), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 336), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 16), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 392), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 8), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 448), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 64), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 504), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 8)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 560)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 560), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 80), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 616)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 616), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[(((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer) + 32256)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 728)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 728), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 840)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 840), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 24), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 896), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 32), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 952)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 952), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 88), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1008)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1008), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 16), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1064)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1064), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 8), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1120), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 64), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1176), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 8)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1232)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1232), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 80), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1288)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1288), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer) + 64512)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1400)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1400), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 1456)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1456), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              if @tir.likely((threadIdx.x_2 < 24), dtype=bool) {
+                kernel.shared_1[(threadIdx.x_2 + 1512)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1512), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 24)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+              }
+              for (rc.outer.inner: int32, 0, 16) {
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 63)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 63)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 64)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 64)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 65)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 65)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 66)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 66)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 67)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 67)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 68)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 68)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 69)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 69)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 70)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 70)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 71)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 71)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 72)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 72)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 73)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 73)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 74)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 74)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 75)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 75)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 76)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 76)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 77)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 77)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 78)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 78)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 79)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 79)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 80)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 80)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
               }
             }
           }
         }
-        for (i2.inner: int32, 0, 7) {
-          compute[((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*49)) + (i2.inner*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[i2.inner] + bias[((blockIdx.x*16) + floordiv(threadIdx.x, 7))]), 0f32)
+        for (i1.inner: int32, 0, 2) {
+          for (i3.inner: int32, 0, 7) {
+            compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+          }
         }
       }
     }
@@ -365,7 +511,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.259 ms
+    Execution time of this operator: 0.401 ms
 
 
 
@@ -413,36 +559,36 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-    conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+    conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=2)
     conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=16)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
     conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
-    conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=7)
+    conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=8)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=1)
-    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
-    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
+    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
+    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=16)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
     compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
-    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -462,14 +608,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=12)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 16)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 1024)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -487,57 +633,186 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(112) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[7];
-      __shared__ float pad_temp_shared[648];
-      __shared__ float kernel_shared[1152];
+    extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[14];
+      __shared__ float pad_temp_shared[2016];
+      __shared__ float kernel_shared[1536];
       conv2d_nchw[0] = 0.000000e+00f;
+      conv2d_nchw[7] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
+      conv2d_nchw[8] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
+      conv2d_nchw[9] = 0.000000e+00f;
       conv2d_nchw[3] = 0.000000e+00f;
+      conv2d_nchw[10] = 0.000000e+00f;
       conv2d_nchw[4] = 0.000000e+00f;
+      conv2d_nchw[11] = 0.000000e+00f;
       conv2d_nchw[5] = 0.000000e+00f;
+      conv2d_nchw[12] = 0.000000e+00f;
       conv2d_nchw[6] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
-        __syncthreads();
-        pad_temp_shared[((int)threadIdx.x)] = (((((9 <= (((int)threadIdx.x) % 81)) && ((((int)threadIdx.x) % 81) < 72)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + ((((int)threadIdx.x) / 81) * 49)) + (((((int)threadIdx.x) % 81) / 9) * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 112)] = (((((9 <= ((((int)threadIdx.x) + 31) % 81)) && (((((int)threadIdx.x) + 31) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 112) / 81) * 49)) + ((((((int)threadIdx.x) + 31) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((9 <= ((((int)threadIdx.x) + 62) % 81)) && (((((int)threadIdx.x) + 62) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 224) / 81) * 49)) + ((((((int)threadIdx.x) + 62) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 336)] = (((((9 <= ((((int)threadIdx.x) + 12) % 81)) && (((((int)threadIdx.x) + 12) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 336) / 81) * 49)) + ((((((int)threadIdx.x) + 12) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-        pad_temp_shared[(((int)threadIdx.x) + 448)] = (((((9 <= ((((int)threadIdx.x) + 43) % 81)) && (((((int)threadIdx.x) + 43) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 448) / 81) * 49)) + ((((((int)threadIdx.x) + 43) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-        if (((int)threadIdx.x) < 88) {
-          pad_temp_shared[(((int)threadIdx.x) + 560)] = (((((9 <= ((((int)threadIdx.x) + 74) % 81)) && (((((int)threadIdx.x) + 74) % 81) < 72)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 560) / 81) * 49)) + ((((((int)threadIdx.x) + 74) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-        }
-        kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) / 72) * 4608)) + (rc_outer_outer * 72)) + (((int)threadIdx.x) % 72))];
-        kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 112) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 40) % 72) / 9) * 9)) + ((((((int)threadIdx.x) + 4) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 224)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 224) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 72) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 336) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 48) % 72) / 9) * 9)) + ((((((int)threadIdx.x) / 3) + 1) % 3) * 3)) + (((int)threadIdx.x) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 448) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 72) / 9) * 9)) + ((((((int)threadIdx.x) + 7) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 560)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 560) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 56) % 72) / 9) * 9)) + ((((int)threadIdx.x) + 2) % 9))];
-        kernel_shared[(((int)threadIdx.x) + 672)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 672) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 24) % 72) / 9) * 9)) + ((((((int)threadIdx.x) / 3) + 2) % 3) * 3)) + (((int)threadIdx.x) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 784)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 64) % 72) / 9) * 9)) + ((((int)threadIdx.x) + 1) % 9))];
-        kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 896) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 32) % 72) / 9) * 9)) + ((((((int)threadIdx.x) + 5) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-        kernel_shared[(((int)threadIdx.x) + 1008)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) / 72) * 4608)) + (rc_outer_outer * 72)) + (((int)threadIdx.x) % 72)) + 64512)];
-        if (((int)threadIdx.x) < 32) {
-          kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1120) / 72) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 9) * 9)) + ((((((int)threadIdx.x) + 4) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-        }
-        __syncthreads();
-        for (int rx_outer_inner = 0; rx_outer_inner < 3; ++rx_outer_inner) {
-          for (int rc_inner = 0; rc_inner < 8; ++rc_inner) {
-            for (int ry_inner = 0; ry_inner < 3; ++ry_inner) {
-              conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-              conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 9)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-              conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 18)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-              conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 27)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-              conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 36)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-              conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 45)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-              conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 54)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-            }
+      conv2d_nchw[13] = 0.000000e+00f;
+      for (int rc_outer_outer = 0; rc_outer_outer < 16; ++rc_outer_outer) {
+        for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
+          __syncthreads();
+          pad_temp_shared[((((((int)threadIdx.x) * 12) / 7) * 7) + ((((int)threadIdx.x) * 5) % 7))] = (((((7 <= ((((int)threadIdx.x) * 12) % 63)) && (((((int)threadIdx.x) * 12) % 63) < 56)) && (1 <= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) && ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) * 12) / 63) * 49)) + ((((((int)threadIdx.x) * 12) % 63) / 7) * 7)) + rx_outer_outer) + ((((int)threadIdx.x) * 5) % 7)) -  [...]
+          pad_temp_shared[(((((((int)threadIdx.x) * 4) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 1) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 1) % 63)) && ((((((int)threadIdx.x) * 12) + 1) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) * 4) / 21) * 49)) + (((((((int)thre [...]
+          pad_temp_shared[(((((((int)threadIdx.x) * 4) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 2) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 2) % 63)) && ((((((int)threadIdx.x) * 12) + 2) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) * 4) / 21) * 49)) + (((((((int)thre [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 1) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 3) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 3) % 63)) && ((((((int)threadIdx.x) * 12) + 3) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 1) / 21) * 49)) + ((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 1) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 4) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 4) % 63)) && ((((((int)threadIdx.x) * 12) + 4) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 1) / 21) * 49)) + ((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 1) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 5) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 5) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 5) % 63)) && ((((((int)threadIdx.x) * 12) + 5) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 1) / 21) * 49)) + ((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 2) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 6) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 6) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 6) % 63)) && ((((((int)threadIdx.x) * 12) + 6) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 2) / 21) * 49)) + ((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 2) / 21) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 1) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 <= ((((((int)threadIdx.x) * 12) / 7) + 1) % 9)) && (((((((int)threadIdx.x) * 12) / 7) + 1) % 9) < 8)) && (1 <= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) && ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 2) / 21) * 49)) + (((((((int)th [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 2) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 8) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 8) % 63)) && ((((((int)threadIdx.x) * 12) + 8) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 2) / 21) * 49)) + ((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 3) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 9) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 9) % 63)) && ((((((int)threadIdx.x) * 12) + 9) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 3) / 21) * 49)) + ((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 3) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 10) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 10) % 63)) && ((((((int)threadIdx.x) * 12) + 10) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 3) / 21) * 49)) +  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 3) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 11) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((7 <= (((((int)threadIdx.x) * 12) + 11) % 63)) && ((((((int)threadIdx.x) * 12) + 11) % 63) < 56)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 3) / 21) * 49)) +  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 12) + 672) / 63) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 6) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 <= ((((((int)threadIdx.x) * 12) / 7) + 6) % 9)) && (((((((int)threadIdx.x) * 12) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) && ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 12) + 672) / 63) * 49)) + ((((((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 1) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 1) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 1) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 2) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 2) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 2) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 3) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 3) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 3) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 4) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 4) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 4) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 5) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 5) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 5) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 5) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 6) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 6) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 6) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 6) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 21) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 7) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 <= ((((((int)threadIdx.x) * 12) / 7) + 7) % 9)) && (((((((int)threadIdx.x) * 12) / 7) + 7) % 9) < 8)) && (1 <= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) && ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 226) / 21) * 49)) + (((((((in [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 8) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 8) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 8) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 9) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 9) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 9) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 10) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 10) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 10) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx. [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 11) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 11) / 7) + 6) % 9)) && ((((((((int)threadIdx.x) * 12) + 11) / 7) + 6) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx. [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 12) + 1344) / 63) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 3) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 <= ((((((int)threadIdx.x) * 12) / 7) + 3) % 9)) && (((((((int)threadIdx.x) * 12) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) && ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 12) + 1344) / 63) * 49)) + ((((( [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 448) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 1) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 1) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 1) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 448) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 2) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 2) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 2) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 449) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 3) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 3) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 3) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 449) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 4) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 4) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 4) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 449) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 5) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 5) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 5) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 5) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 450) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 6) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 6) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 6) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 6) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 450) / 21) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 4) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 <= ((((((int)threadIdx.x) * 12) / 7) + 4) % 9)) && (((((((int)threadIdx.x) * 12) / 7) + 4) % 9) < 8)) && (1 <= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) && ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + 450) / 21) * 49)) + (((((((in [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 450) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 8) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 8) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 8) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 451) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 9) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 9) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 9) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x)  [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 451) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 10) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 10) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 10) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx. [...]
+          pad_temp_shared[((((((((int)threadIdx.x) * 4) + 451) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 11) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 <= (((((((int)threadIdx.x) * 12) + 11) / 7) + 3) % 9)) && ((((((((int)threadIdx.x) * 12) + 11) / 7) + 3) % 9) < 8)) && (1 <= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) && ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) < 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx. [...]
+          kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 56)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 56) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 56) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 112) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 168)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 168) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 24) & 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 224) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 32) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 280) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 88) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 336) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 16) & 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 448) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 64) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 504)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 504) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 72)];
+          kernel_shared[(((int)threadIdx.x) + 560)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 560) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 80) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 616)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 616) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 40) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 672)] = kernel[(((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 32256)];
+          kernel_shared[(((int)threadIdx.x) + 728)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 728) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 56) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 784)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 840)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 840) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 24) & 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 896) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 32) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 952)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 952) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 88) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1008)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1008) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 16) & 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1064)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1064) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1120) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 64) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1176) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 72)];
+          kernel_shared[(((int)threadIdx.x) + 1232)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1232) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 80) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1288)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1288) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 40) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[(((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 64512)];
+          kernel_shared[(((int)threadIdx.x) + 1400)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1400) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 56) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+          kernel_shared[(((int)threadIdx.x) + 1456)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1456) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+          if (((int)threadIdx.x) < 24) {
+            kernel_shared[(((int)threadIdx.x) + 1512)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1512) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 216)];
+          }
+          __syncthreads();
+          for (int rc_outer_inner = 0; rc_outer_inner < 16; ++rc_outer_inner) {
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7))] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7))] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 63)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 63)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 1)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 64)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 64)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 2)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 65)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 65)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 3)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 66)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 66)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 4)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 67)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 67)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 5)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 68)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 68)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 6)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 69)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 69)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 70)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 70)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 71)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 71)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 72)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 72)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 73)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 73)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 74)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 74)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 75)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 75)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 76)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 76)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 77)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 77)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 78)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 78)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 79)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 79)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 80)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 80)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
           }
         }
       }
-      for (int i2_inner = 0; i2_inner < 7; ++i2_inner) {
-        compute[((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 49)) + (i2_inner * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[i2_inner] + bias[((((int)blockIdx.x) * 16) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
+      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
+        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
+          compute[(((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 98)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 16) + ((((int)threadIdx.x) / 7) * 2)) + i1_inner)]), 0.000000e+00f);
+        }
       }
     }
 
@@ -599,7 +874,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  27.225 seconds)
+   **Total running time of the script:** ( 3 minutes  14.110 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index f1340b6ce..6f0c53348 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -647,7 +647,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       9.6846       9.6935       9.7301       9.6302       0.0413   
+       9.7934       9.8060       9.8157       9.7584       0.0251   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index e8667e4da..2bf3819fe 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -666,7 +666,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      753.6402     753.3775     754.2426     753.3006      0.4271   
+      773.7814     773.5620     774.6151     773.1672      0.6112   
                
 
 
@@ -694,7 +694,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  23.284 seconds)
+   **Total running time of the script:** ( 1 minutes  21.555 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index a7c39df9f..0ae3c280d 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -397,75 +397,77 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {placeholder_7: placeholder_15: Buffer(placeholder_12, int32, [4916], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_6: placeholder_18: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], [])} {
-      for (i0.outer.i1.outer.fused: int32, 0, 256) "parallel" {
-        allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
-          for (nb_j.inner: int32, 0, 2) {
-            for (i.inner.init: int32, 0, 8) {
-              let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
-               {
-                compute_5: Buffer(compute_4, float32, [256], [])[cse_var_1] = 0f32
-                compute_5[(cse_var_1 + 1)] = 0f32
-                compute_5[(cse_var_1 + 2)] = 0f32
-                compute_5[(cse_var_1 + 3)] = 0f32
-                compute_5[(cse_var_1 + 4)] = 0f32
-                compute_5[(cse_var_1 + 5)] = 0f32
-                compute_5[(cse_var_1 + 6)] = 0f32
-                compute_5[(cse_var_1 + 7)] = 0f32
-                compute_5[(cse_var_1 + 8)] = 0f32
-                compute_5[(cse_var_1 + 9)] = 0f32
-                compute_5[(cse_var_1 + 10)] = 0f32
-                compute_5[(cse_var_1 + 11)] = 0f32
-                compute_5[(cse_var_1 + 12)] = 0f32
-                compute_5[(cse_var_1 + 13)] = 0f32
-                compute_5[(cse_var_1 + 14)] = 0f32
-                compute_5[(cse_var_1 + 15)] = 0f32
-              }
-            }
-            for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-              for (i.inner: int32, 0, 8) {
-                let cse_var_21: int32 = (elem_idx*16)
-                let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
-                let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-                let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*2048) + (i.inner*256))
-                let cse_var_17: int32 = (cse_var_20 + 9)
-                let cse_var_16: int32 = (cse_var_20 + 8)
-                let cse_var_15: int32 = (cse_var_20 + 7)
-                let cse_var_14: int32 = (cse_var_20 + 6)
-                let cse_var_13: int32 = (cse_var_20 + 5)
-                let cse_var_12: int32 = (cse_var_20 + 4)
-                let cse_var_11: int32 = (cse_var_20 + 3)
-                let cse_var_10: int32 = (cse_var_20 + 2)
-                let cse_var_9: int32 = (cse_var_20 + 15)
-                let cse_var_8: int32 = (cse_var_20 + 14)
-                let cse_var_7: int32 = (cse_var_20 + 13)
-                let cse_var_6: int32 = (cse_var_20 + 12)
-                let cse_var_5: int32 = (cse_var_20 + 11)
-                let cse_var_4: int32 = (cse_var_20 + 10)
-                let cse_var_3: int32 = (cse_var_20 + 1)
+      preflattened_buffer_map = {placeholder_9: placeholder_15: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_6: placeholder_17: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_8: placeholder_18: Buffer(placeholder_13, int32, [33], []), placeholder_7: placeholder_19: Buffer(placeholder_12, int32, [4916], [])} {
+      for (i0.outer.i1.outer.fused: int32, 0, 16) "parallel" {
+        allocate(compute_4: Pointer(global float32), float32, [4096]), storage_scope = global {
+          for (i.outer.inner: int32, 0, 4) {
+            for (nb_j.inner: int32, 0, 2) {
+              for (i.inner.init: int32, 0, 32) {
+                let cse_var_1: int32 = (((i.outer.inner*1024) + (i.inner.init*32)) + (nb_j.inner*16))
                  {
-                  compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5: Buffer(compute_4, float32, [4096], [])[cse_var_1] = 0f32
+                  compute_5[(cse_var_1 + 1)] = 0f32
+                  compute_5[(cse_var_1 + 2)] = 0f32
+                  compute_5[(cse_var_1 + 3)] = 0f32
+                  compute_5[(cse_var_1 + 4)] = 0f32
+                  compute_5[(cse_var_1 + 5)] = 0f32
+                  compute_5[(cse_var_1 + 6)] = 0f32
+                  compute_5[(cse_var_1 + 7)] = 0f32
+                  compute_5[(cse_var_1 + 8)] = 0f32
+                  compute_5[(cse_var_1 + 9)] = 0f32
+                  compute_5[(cse_var_1 + 10)] = 0f32
+                  compute_5[(cse_var_1 + 11)] = 0f32
+                  compute_5[(cse_var_1 + 12)] = 0f32
+                  compute_5[(cse_var_1 + 13)] = 0f32
+                  compute_5[(cse_var_1 + 14)] = 0f32
+                  compute_5[(cse_var_1 + 15)] = 0f32
+                }
+              }
+              for (elem_idx: int32, 0, let cse_var_2: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+                for (i.inner: int32, 0, 32) {
+                  let cse_var_21: int32 = (elem_idx*16)
+                  let cse_var_20: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner)
+                  let cse_var_19: int32 = ((i.outer.inner*8192) + (i.inner*256))
+                  let cse_var_18: int32 = (((i.outer.inner*1024) + (i.inner*32)) + (nb_j.inner*16))
+                  let cse_var_17: int32 = (cse_var_18 + 9)
+                  let cse_var_16: int32 = (cse_var_18 + 8)
+                  let cse_var_15: int32 = (cse_var_18 + 7)
+                  let cse_var_14: int32 = (cse_var_18 + 6)
+                  let cse_var_13: int32 = (cse_var_18 + 5)
+                  let cse_var_12: int32 = (cse_var_18 + 4)
+                  let cse_var_11: int32 = (cse_var_18 + 3)
+                  let cse_var_10: int32 = (cse_var_18 + 2)
+                  let cse_var_9: int32 = (cse_var_18 + 15)
+                  let cse_var_8: int32 = (cse_var_18 + 14)
+                  let cse_var_7: int32 = (cse_var_18 + 13)
+                  let cse_var_6: int32 = (cse_var_18 + 12)
+                  let cse_var_5: int32 = (cse_var_18 + 11)
+                  let cse_var_4: int32 = (cse_var_18 + 10)
+                  let cse_var_3: int32 = (cse_var_18 + 1)
+                   {
+                    compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[((placeholder_3[cse_var_20]*16) + cse_var_21)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                    compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                  }
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 8) {
-            let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+          for (i0.inner: int32, 0, 128) {
+            let cse_var_22: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*32))
             compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
           }
         }
@@ -522,7 +524,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.927 ms
+    Execution time of this operator: 1.753 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index d50551c1a..5c877c0a3 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,16 +5,16 @@
 
 Computation times
 =================
-**00:43.953** total execution time for **how_to_tune_with_autotvm** files:
+**00:43.715** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:43.919 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:43.679 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.019 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.021 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.005 | 0.0 MB |
-+--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``) | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.005 | 0.0 MB |
++--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 801d3768b..be3b4972e 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -892,8 +892,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
-    No: 6   GFLOPS: 110.85/110.85   result: MeasureResult(costs=(0.0020884737708333333,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8389439582824707, timestamp=1658167012.8734102)      [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
-    No: 7   GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 6   GFLOPS: 42.37/42.37     result: MeasureResult(costs=(0.005463718842105263,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6390187740325928, timestamp=1658180905.033303)        [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+    No: 7   GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1016,7 +1016,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
-    No: 8   GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 8   GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1139,7 +1139,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
-    No: 9   GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 9   GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1262,7 +1262,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
-    No: 10  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 10  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1280,7 +1280,7 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
-    No: 11  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 11  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1403,7 +1403,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
-    No: 12  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 12  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1526,7 +1526,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
-    No: 13  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1649,7 +1649,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
-    No: 14  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1772,7 +1772,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
-    No: 15  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 15  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1895,7 +1895,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
-    No: 16  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 16  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2018,7 +2018,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
-    No: 17  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 17  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2141,7 +2141,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
-    No: 18  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 18  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2264,7 +2264,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
-    No: 19  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+    No: 19  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 738, in __call__
         yield remote, remote.load_module(os.path.split(build_result.filename)[1])
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 702, in run_through_rpc
@@ -2352,7 +2352,7 @@ for this template
       15: _PyEval_EvalFrameDefault
       14: 0x0000000000537c30
       13: _PyObject_FastCallKeywords
-      12: 0x00007f0c1aea4fa2
+      12: 0x00007f6238feefa2
       11: _ctypes_callproc
       10: ffi_call
       9: ffi_call_unix64
@@ -2417,7 +2417,7 @@ for this template
       21: _PyFunction_FastCallKeywords
       20: _PyEval_EvalFrameDefault
       19: _PyFunction_FastCall      [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
-    No: 20  GFLOPS: 144.67/144.67   result: MeasureResult(costs=(0.00160021674,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.448404312133789, timestamp=1658167038.7930841)       [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+    No: 20  GFLOPS: 144.06/144.06   result: MeasureResult(costs=(0.00160695119,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.440112590789795, timestamp=1658180931.5978546)       [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
 
 
 
@@ -2474,7 +2474,7 @@ and measure running time.
     Best config:
     [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
     Finish loading 20 records
-    Time cost of this operator: 0.002015
+    Time cost of this operator: 0.002017
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 72aa02068..bda3c6b39 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -329,10 +329,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  311.2     98.724   (1, 2, 10, 10, 3)  2       1        [311.2]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.046     0.966    (1, 6, 10, 10)     1       1        [3.046]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.977     0.31     (1, 1, 10, 10, 3)  1       1        [0.977]           
-    Total_time                                    -                                             315.222   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.7     98.726   (1, 2, 10, 10, 3)  2       1        [309.7]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.017     0.962    (1, 6, 10, 10)     1       1        [3.017]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.98      0.312    (1, 1, 10, 10, 3)  1       1        [0.98]            
+    Total_time                                    -                                             313.697   -        -                  -       -        -                 
 
 
 
@@ -398,10 +398,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  247.6     98.819   (1, 1, 10, 10, 6)  2       1        [247.6]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.987     0.793    (1, 6, 10, 10)     1       1        [1.987]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.972     0.388    (1, 1, 10, 10, 3)  1       1        [0.972]           
-    Total_time                                    -                                             250.559   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  121.8     97.83    (1, 6, 10, 10, 1)  2       1        [121.8]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.747     1.403    (1, 6, 10, 10)     1       1        [1.747]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.955     0.767    (1, 1, 10, 10, 3)  1       1        [0.955]           
+    Total_time                                    -                                             124.502   -        -                  -       -        -                 
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 698fa6eb4..8caba4f93 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -225,7 +225,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpvfj7zyx7/images/random'
+    '/tmp/tmpzq69y51r/images/random'
 
 
 
@@ -325,8 +325,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpvfj7zyx7/images/target contains 8144 images
-    /tmp/tmpvfj7zyx7/images/random contains 5000 images
+    /tmp/tmpzq69y51r/images/target contains 8144 images
+    /tmp/tmpzq69y51r/images/random contains 5000 images
 
 
 
@@ -501,13 +501,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 55s - loss: 0.2419 - accuracy: 0.9193 - val_loss: 0.1380 - val_accuracy: 0.9532
+    328/328 - 55s - loss: 0.2177 - accuracy: 0.9237 - val_loss: 0.1470 - val_accuracy: 0.9520
     Epoch 2/3
-    328/328 - 53s - loss: 0.0997 - accuracy: 0.9630 - val_loss: 0.1219 - val_accuracy: 0.9626
+    328/328 - 53s - loss: 0.0957 - accuracy: 0.9647 - val_loss: 0.1194 - val_accuracy: 0.9630
     Epoch 3/3
-    328/328 - 53s - loss: 0.0688 - accuracy: 0.9738 - val_loss: 0.1334 - val_accuracy: 0.9569
+    328/328 - 52s - loss: 0.0658 - accuracy: 0.9757 - val_loss: 0.1752 - val_accuracy: 0.9426
 
-    <keras.callbacks.History object at 0x7f21783f5090>
+    <keras.callbacks.History object at 0x7f3384193890>
 
 
 
@@ -864,7 +864,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 4 minutes  25.546 seconds)
+   **Total running time of the script:** ( 4 minutes  48.383 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 0ab020762..dba17b36c 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**05:13.418** total execution time for **how_to_work_with_microtvm** files:
+**05:34.881** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:25.546 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:48.383 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:44.502 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:43.192 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.369 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.305 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.001 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 241f00ebd..8d109aae7 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:11.545** total execution time for **how_to_work_with_relay** files:
+**00:12.436** total execution time for **how_to_work_with_relay** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:10.023 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:10.597 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.515 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.833 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)       | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index afb105192..48dab7a59 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -261,7 +261,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7f20f80f7e60>
+    <function my_cuda_math_rule at 0x7f32f4985830>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 1c1fb5e80..d811d0b80 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**00:04.162** total execution time for **how_to_work_with_schedules** files:
+**00:04.303** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.919 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.941 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.991 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.106 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.538 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.541 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.525 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.531 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.103 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.101 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.043 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.040 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.028 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.029 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.015 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 3125e206f..ecf5bfaae 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -347,7 +347,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmp9ffzmr_l/input0.cc'\nsource_filename = \"/tmp/tmp9ffzmr_l/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpjlp7kihb/input0.cc'\nsource_filename = \"/tmp/tmpjlp7kihb/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 71cdd769d..d6381bbe9 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:22.204** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:21.427** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:22.197 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:21.420 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.006 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 770a1720e..83f08fb59 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -291,7 +291,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 24.04s!
+    resnet18_v1 inference graph built in 23.67s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index 71b1e6e39..76a86eba4 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -335,7 +335,7 @@ The compilation steps are:
       "target_host parameter is going to be deprecated. "
     /workspace/python/tvm/relay/build_module.py:411: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 16.81s!
+    yolov3-tiny inference graph built in 15.79s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index e1dd76f51..aa5dcaa7f 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:32.191** total execution time for **topic_vta_tutorials_frontend** files:
+**01:32.900** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:48.302 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:48.791 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:43.889 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:44.109 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 8c38f0024..daac687dd 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.340** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.279** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.931 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.865 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.409 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.414 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index eb0ea1a2a..30bbbf58c 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.729** total execution time for **topic_vta_tutorials** files:
+**00:00.777** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.392 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.424 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.337 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.353 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index b1709e77c..af85da576 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -205,13 +205,6 @@ trials, we can load the best schedule from the log file and apply it.
 
 
 
-.. rst-class:: sphx-glr-script-out
-
- .. code-block:: none
-
-    *E
-
-
 
 
 
@@ -335,7 +328,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 94.350 ms
+    Execution time of this operator: 93.226 ms
 
 
 
@@ -451,11 +444,6 @@ Expression (TE) language that demonstrates how TVM can optimize computational
 operations.
 
 
-.. rst-class:: sphx-glr-timing
-
-   **Total running time of the script:** ( 1 minutes  14.482 seconds)
-
-
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
 
 .. only:: html
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index 92590ee2c..6a703b6ac 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -462,16 +462,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 10.24/10.24     result: MeasureResult(costs=(0.0262102764,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5559689998626709, timestamp=1658165860.1578383)       [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
-    No: 2   GFLOPS: 2.91/10.24      result: MeasureResult(costs=(0.0922320586,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6032614707946777, timestamp=1658165861.7975442)       [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
-    No: 3   GFLOPS: 11.72/11.72     result: MeasureResult(costs=(0.022902535,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5882463455200195, timestamp=1658165862.869192) [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
-    No: 4   GFLOPS: 1.86/11.72      result: MeasureResult(costs=(0.14466019400000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.440467596054077, timestamp=1658165865.8920832) [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
-    No: 5   GFLOPS: 3.63/11.72      result: MeasureResult(costs=(0.0738574394,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3178660869598389, timestamp=1658165867.8718855)       [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
-    No: 6   GFLOPS: 1.73/11.72      result: MeasureResult(costs=(0.1547792328,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6386325359344482, timestamp=1658165870.552622)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
-    No: 7   GFLOPS: 0.86/11.72      result: MeasureResult(costs=(0.3108912492,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.09726619720459, timestamp=1658165875.6951525) [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 8   GFLOPS: 10.59/11.72     result: MeasureResult(costs=(0.0253523318,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5732951164245605, timestamp=1658165876.2851822)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
-    No: 9   GFLOPS: 1.77/11.72      result: MeasureResult(costs=(0.15187112239999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.553722381591797, timestamp=1658165878.95927)   [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
-    No: 10  GFLOPS: 2.62/11.72      result: MeasureResult(costs=(0.1025046486,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7477610111236572, timestamp=1658165880.7645075)       [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
+    No: 1   GFLOPS: 8.15/8.15       result: MeasureResult(costs=(0.0329551682,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6634762287139893, timestamp=1658179791.689761)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 2   GFLOPS: 2.83/8.15       result: MeasureResult(costs=(0.0947758338,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6653764247894287, timestamp=1658179793.8905063)       [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
+    No: 3   GFLOPS: 11.84/11.84     result: MeasureResult(costs=(0.022662860599999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6025173664093018, timestamp=1658179794.460175)        [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
+    No: 4   GFLOPS: 1.76/11.84      result: MeasureResult(costs=(0.1523976374,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5654454231262207, timestamp=1658179797.58456) [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
+    No: 5   GFLOPS: 3.65/11.84      result: MeasureResult(costs=(0.0736083214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3125183582305908, timestamp=1658179799.0248601)       [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
+    No: 6   GFLOPS: 1.51/11.84      result: MeasureResult(costs=(0.177441876,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.0150091648101807, timestamp=1658179802.0840669)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
+    No: 7   GFLOPS: 0.87/11.84      result: MeasureResult(costs=(0.3084976306,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.058420896530151, timestamp=1658179807.7101648)        [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 8   GFLOPS: 10.69/11.84     result: MeasureResult(costs=(0.0251169682,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5440967082977295, timestamp=1658179808.2757041)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
+    No: 9   GFLOPS: 1.91/11.84      result: MeasureResult(costs=(0.1404379644,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.3505797386169434, timestamp=1658179810.7455938)       [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
+    No: 10  GFLOPS: 2.77/11.84      result: MeasureResult(costs=(0.0967519658,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6531894207000732, timestamp=1658179812.457256)        [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index b311113e0..c63ff8b86 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -327,7 +327,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 497.4388198099996, 'median': 497.54519420000065, 'std': 0.9764619767640734}
+    {'mean': 493.15499541000463, 'median': 492.9314198500151, 'std': 0.81322431603063}
 
 
 
@@ -563,31 +563,31 @@ the tuning data to.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.37/  17.37 GFLOPS | Progress: (4/20) | 6.47 s
    [Task  1/25]  Current/Best:    6.16/  17.37 GFLOPS | Progress: (8/20) | 9.50 s
    [Task  1/25]  Current/Best:   11.51/  22.72 GFLOPS | Progress: (12/20) | 11.95 s
    [Task  1/25]  Current/Best:   16.57/  22.78 GFLOPS | Progress: (16/20) | 13.64 s
    [Task  1/25]  Current/Best:   11.54/  23.87 GFLOPS | Progress: (20/20) | 15.39 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.11/  12.80 GFLOPS | Progress: (4/20) | 3.82 s
    [Task  2/25]  Current/Best:   14.13/  17.91 GFLOPS | Progress: (8/20) | 5.15 s
    [Task  2/25]  Current/Best:   20.76/  20.76 GFLOPS | Progress: (12/20) | 6.49 s
    [Task  2/25]  Current/Best:   12.35/  20.76 GFLOPS | Progress: (16/20) | 7.78 s
    [Task  2/25]  Current/Best:   18.94/  20.76 GFLOPS | Progress: (20/20) | 9.40 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.54 GFLOPS | Progress: (4/20) | 5.89 s
    [Task  3/25]  Current/Best:   15.54/  16.82 GFLOPS | Progress: (8/20) | 7.82 s
    [Task  3/25]  Current/Best:   14.86/  16.82 GFLOPS | Progress: (12/20) | 9.54 s
    [Task  3/25]  Current/Best:    7.19/  23.65 GFLOPS | Progress: (16/20) | 11.48 s
    [Task  3/25]  Current/Best:   12.63/  23.65 GFLOPS | Progress: (20/20) | 16.01 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.57/  20.40 GFLOPS | Progress: (4/20) | 2.42 s
    [Task  4/25]  Current/Best:    6.86/  20.40 GFLOPS | Progress: (8/20) | 6.75 s
    [Task  4/25]  Current/Best:   22.16/  22.16 GFLOPS | Progress: (12/20) | 11.24 s
    [Task  4/25]  Current/Best:   17.18/  22.16 GFLOPS | Progress: (16/20) | 13.48 s
    [Task  4/25]  Current/Best:   12.85/  22.16 GFLOPS | Progress: (20/20) | 15.40 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.40/  10.15 GFLOPS | Progress: (4/20) | 2.66 s
    [Task  5/25]  Current/Best:   11.64/  12.91 GFLOPS | Progress: (8/20) | 4.74 s
    [Task  5/25]  Current/Best:   10.41/  18.03 GFLOPS | Progress: (12/20) | 7.70 s
    [Task  5/25]  Current/Best:   11.74/  22.59 GFLOPS | Progress: (16/20) | 9.12 s
    [Task  5/25]  Current/Best:   11.74/  22.59 GFLOPS | Progress: (20/20) | 10.99 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.25/  20.70 GFLOPS | Progress: (4/20) | 4.01 s
    [Task  6/25]  Current/Best:   18.96/  20.70 GFLOPS | Progress: (8/20) | 5.77 s
    [Task  6/25]  Current/Best:   13.29/  20.70 GFLOPS | Progress: (12/20) | 7.71 s
    [Task  6/25]  Current/Best:   19.91/  20.70 GFLOPS | Progress: (16/20) | 9.98 s
    [Task  6/25]  Current/Best:    3.75/  20.70 GFLOPS | Progress: (20/20) | 12.50 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.10/  12.81 GFLOPS | Progress: (4/20) | 3.68 s
    [Task  7/25]  Current/Best:   20.14/  20.99 GFLOPS | Progress: (8/20) | 5.20 s
    [Task  7/25]  Current/Best:   15.84/  20.99 GFLOPS | Progress: (12/20) | 7.11 s
    [Task  7/25]  Current/Best:   12.26/  20.99 GFLOPS | Progress: (16/20) | 9.19 s
    [Task  7/25]  Current/Best:    6.43/  21.53 GFLOPS | Progress: (20/20) | 11.65 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.17/  13.92 GFLOPS | Progress: (4/20) | 2.94 s
    [Task  8/25]  Current/Best:    9.61/  13.92 GFLOPS | Progress: (8/20) | 7.76 s
    [Task  8/25]  Current/Best:   12.63/  13.92 GFLOPS | Progress: (12/20) | 13.94 s
    [Task  8/25]  Current/Best:   18.95/  18.95 GFLOPS | Progress: (16/20) | 16.05 s
    [Task  8/25]  Current/Best:   20.21/  20.21 GFLOPS | Progress: (20/20) | 22.62 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.11/  15.59 GFLOPS | Progress: (4/20) | 12.01 s
    [Task  9/25]  Current/Best:   22.96/  22.96 GFLOPS | Progress: (8/20) | 13.91 s
    [Task  9/25]  Current/Best:    8.22/  22.96 GFLOPS | Progress: (12/20) | 16.31 s
    [Task  9/25]  Current/Best:   17.62/  22.96 GFLOPS | Progress: (16/20) | 18.91 s
    [Task  9/25]  Current/Best:    9.11/  22.96 GFLOPS | Progress: (20/20) | 26.61 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.41/  18.41 GFLOPS | Progress: (4/20) | 2.63 s
    [Task 10/25]  Current/Best:   15.48/  18.41 GFLOPS | Progress: (8/20) | 4.23 s
    [Task 10/25]  Current/Best:   12.44/  19.05 GFLOPS | Progress: (12/20) | 5.76 s
    [Task 10/25]  Current/Best:   19.10/  20.22 GFLOPS | Progress: (16/20) | 6.88 s
    [Task 10/25]  Current/Best:    8.85/  20.22 GFLOPS | Progress: (20/20
 ) | 8.45 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   11.55/  18.07 GFLOPS | Progress: (4/20) | 3.42 s
    [Task 11/25]  Current/Best:   16.41/  18.07 GFLOPS | Progress: (8/20) | 6.17 s
    [Task 11/25]  Current/Best:   18.00/  18.07 GFLOPS | Progress: (12/20) | 8.23 s
    [Task 11/25]  Current/Best:   13.40/  21.13 GFLOPS | Progress: (16/20) | 11.04 s
    [Task 11/25]  Current/Best:   19.34/  21.46 GFLOPS | Progress: (20/20) | 13.06 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.78/  17.91 GFLOPS | Progress: (4/20) | 5.49 s
    [Task 12/25]  Current/Best:    5.16/  17.91 GFLOPS | Progress: (8/20) | 9.21 s
    [Task 12/25]  Current/Best:   19.20/  19.20 GFLOPS | Progress: (12/20) | 11.20 s
    [Task 12/25]  Current/Best:   14.06/  19.20 GFLOPS | Progress: (16/20) | 14.04 s
    [Task 12/25]  Current/Best:   15.10/  19.20 GFLOPS | Progress: (20/20) | 15.96 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.70/  17.25 GFLOPS | Progress: (4/20) | 3.73 s
    [Task 13/25]  Current/Best:   15.76/  20.83 GFLOPS | Progress: (8/20) | 6.19 s
    [Task 13/25]  Current/Best:   19.46/  21.37 GFLOPS | Progress: (12/20) | 9.16 s
    [Task 13/25]  Current/Best:   12.22/  21.37 GFLOPS | Progress: (16/20) | 12.55 s
    [Task 13/25]  Current/Best:   18.65/  21.37 GFLOPS | Progress: (20/20) | 14.79 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.62/  13.62 GFLOPS | Progress: (4/20) | 3.38 s
    [Task 14/25]  Current/Best:    6.06/  13.62 GFLOPS | Progress: (8/20) | 5.55 s
    [Task 14/25]  Current/Best:   20.61/  20.61 GFLOPS | Progress: (12/20) | 8.10 s
    [Task 14/25]  Current/Best:   16.49/  20.61 GFLOPS | Progress: (16/20) | 9.75 s Done.
-
    [Task 14/25]  Current/Best:   17.13/  20.61 GFLOPS | Progress: (20/20) | 11.53 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.17/  17.66 GFLOPS | Progress: (4/20) | 2.77 s
    [Task 15/25]  Current/Best:   14.36/  18.05 GFLOPS | Progress: (8/20) | 4.11 s
    [Task 15/25]  Current/Best:   10.37/  22.23 GFLOPS | Progress: (12/20) | 6.22 s
    [Task 15/25]  Current/Best:   20.41/  22.23 GFLOPS | Progress: (16/20) | 9.12 s
    [Task 15/25]  Current/Best:    9.68/  22.23 GFLOPS | Progress: (20/20) | 10.10 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.21/  20.21 GFLOPS | Progress: (4/20) | 3.02 s
    [Task 16/25]  Current/Best:    3.04/  20.21 GFLOPS | Progress: (8/20) | 4.65 s
    [Task 16/25]  Current/Best:   19.17/  20.21 GFLOPS | Progress: (12/20) | 5.88 s
    [Task 16/25]  Current/Best:   17.38/  20.21 GFLOPS | Progress: (16/20) |
  7.23 s
    [Task 16/25]  Current/Best:    9.99/  21.89 GFLOPS | Progress: (20/20) | 9.30 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   14.30/  18.82 GFLOPS | Progress: (4/20) | 4.78 s
    [Task 17/25]  Current/Best:   14.38/  22.94 GFLOPS | Progress: (8/20) | 7.70 s
    [Task 17/25]  Current/Best:   17.18/  22.94 GFLOPS | Progress: (12/20) | 9.76 s
    [Task 17/25]  Current/Best:   17.12/  22.94 GFLOPS | Progress: (16/20) | 11.90 s
    [Task 17/25]  Current/Best:   10.02/  22.94 GFLOPS | Progress: (20/20) | 14.05 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   10.91/  16.32 GFLOPS | Progress: (4/20) | 3.76 s
    [Task 18/25]  Current/Best:   10.61/  19.52 GFLOPS | Progress: (8/20) | 7.22 s
    [Task 18/25]  Current/Best:   19.15/  19.52 GFLOPS | Progress: (12/20) | 9.15 s
    [Task 18/25]  Current/Best:   10.05/  19.52 GFLOPS | Progress: (16/20) | 12.75 s
    [Task 18/25]  Current/Best:   20.73/  20.73 GFLOPS | Progress: (20/20) | 14.25 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    6.91/  20.07 GFLOPS | Progress: (4/20) | 6.11 s
    [Task 19/25]  Current/Best:    2.61/  20.07 GFLOPS | Progress: (8/20) | 9.42 s
    [Task 19/25]  Current/Best:   19.36/  20.92 GFLOPS | Progress: (12/20) | 12.25 s
    [Task 19/25]  Current/Best:   15.50/  21.03 GFLOPS | Progress: (16/20) | 15.11 s
    [Task 19/25]  Current/Best:    2.70/  23.09 GFLOPS | Progress: (20/20) | 17.92 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.12/  14.81 GFLOPS | Progress: (4/20) | 3.38 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.46/  17.46 GFLOPS | Progress: (4/20) | 6.75 s
    [Task  1/25]  Current/Best:    6.16/  17.46 GFLOPS | Progress: (8/20) | 9.23 s
    [Task  1/25]  Current/Best:   11.57/  22.81 GFLOPS | Progress: (12/20) | 11.64 s
    [Task  1/25]  Current/Best:   16.79/  22.81 GFLOPS | Progress: (16/20) | 13.32 s
    [Task  1/25]  Current/Best:   11.65/  23.92 GFLOPS | Progress: (20/20) | 15.06 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.26/  13.21 GFLOPS | Progress: (4/20) | 3.74 s
    [Task  2/25]  Current/Best:   14.23/  18.19 GFLOPS | Progress: (8/20) | 5.05 s
    [Task  2/25]  Current/Best:   21.30/  21.30 GFLOPS | Progress: (12/20) | 6.37 s
    [Task  2/25]  Current/Best:   12.66/  21.30 GFLOPS | Progress: (16/20) | 7.62 s
    [Task  2/25]  Current/Best:   20.02/  21.30 GFLOPS | Progress: (20/20) | 9.21 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.60 GFLOPS | Progress: (4/20) | 5.85 s
    [Task  3/25]  Current/Best:   15.59/  16.91 GFLOPS | Progress: (8/20) | 7.77 s
    [Task  3/25]  Current/Best:   14.94/  16.91 GFLOPS | Progress: (12/20) | 9.47 s
    [Task  3/25]  Current/Best:    7.17/  23.85 GFLOPS | Progress: (16/20) | 11.38 s
    [Task  3/25]  Current/Best:   12.63/  23.85 GFLOPS | Progress: (20/20) | 15.88 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.54/  20.46 GFLOPS | Progress: (4/20) | 2.37 s
    [Task  4/25]  Current/Best:    6.86/  20.46 GFLOPS | Progress: (8/20) | 6.64 s
    [Task  4/25]  Current/Best:   21.72/  21.72 GFLOPS | Progress: (12/20) | 11.03 s
    [Task  4/25]  Current/Best:   16.47/  21.72 GFLOPS | Progress: (16/20) | 13.27 s
    [Task  4/25]  Current/Best:   13.44/  21.72 GFLOPS | Progress: (20/20) | 15.30 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.69/  10.30 GFLOPS | Progress: (4/20) | 2.60 s
    [Task  5/25]  Current/Best:   11.87/  12.82 GFLOPS | Progress: (8/20) | 4.68 s
    [Task  5/25]  Current/Best:   11.79/  18.08 GFLOPS | Progress: (12/20) | 7.75 s
    [Task  5/25]  Current/Best:   11.67/  22.58 GFLOPS | Progress: (16/20) | 9.22 s
    [Task  5/25]  Current/Best:   12.02/  22.58 GFLOPS | Progress: (20/20) | 11.07 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.19/  20.74 GFLOPS | Progress: (4/20) | 3.93 s
    [Task  6/25]  Current/Best:   19.01/  20.74 GFLOPS | Progress: (8/20) | 5.70 s
    [Task  6/25]  Current/Best:   13.32/  20.74 GFLOPS | Progress: (12/20) | 7.63 s
    [Task  6/25]  Current/Best:   19.98/  20.74 GFLOPS | Progress: (16/20) | 9.90 s
    [Task  6/25]  Current/Best:    3.72/  20.74 GFLOPS | Progress: (20/20) | 12.44 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   10.96/  12.97 GFLOPS | Progress: (4/20) | 3.54 s
    [Task  7/25]  Current/Best:   20.34/  21.21 GFLOPS | Progress: (8/20) | 5.04 s
    [Task  7/25]  Current/Best:   16.10/  21.21 GFLOPS | Progress: (12/20) | 6.93 s
    [Task  7/25]  Current/Best:   12.25/  21.21 GFLOPS | Progress: (16/20) | 8.98 s
    [Task  7/25]  Current/Best:    6.35/  21.51 GFLOPS | Progress: (20/20) | 11.45 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    9.94/  14.03 GFLOPS | Progress: (4/20) | 2.90 s
    [Task  8/25]  Current/Best:    9.46/  14.03 GFLOPS | Progress: (8/20) | 7.64 s
    [Task  8/25]  Current/Best:   12.97/  14.03 GFLOPS | Progress: (12/20) | 13.70 s
    [Task  8/25]  Current/Best:   18.78/  18.78 GFLOPS | Progress: (16/20) | 15.81 s
    [Task  8/25]  Current/Best:   19.74/  19.74 GFLOPS | Progress: (20/20) | 22.21 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.28/  15.78 GFLOPS | Progress: (4/20) | 11.98 s
    [Task  9/25]  Current/Best:   23.33/  23.33 GFLOPS | Progress: (8/20) | 13.79 s
    [Task  9/25]  Current/Best:    8.20/  23.33 GFLOPS | Progress: (12/20) | 16.11 s
    [Task  9/25]  Current/Best:   17.92/  23.33 GFLOPS | Progress: (16/20) | 18.64 s
    [Task  9/25]  Current/Best:    9.20/  23.33 GFLOPS | Progress: (20/20) | 26.21 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.40/  18.40 GFLOPS | Progress: (4/20) | 2.57 s
    [Task 10/25]  Current/Best:   15.58/  18.40 GFLOPS | Progress: (8/20) | 4.14 s
    [Task 10/25]  Current/Best:   13.15/  19.05 GFLOPS | Progress: (12/20) | 5.66 s
    [Task 10/25]  Current/Best:   19.21/  20.63 GFLOPS | Progress: (16/20) | 6.78 s
    [Task 10/25]  Current/Best:    8.87/  20.63 GFLOPS | Progress: (20/20
 ) | 8.31 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.34/  18.15 GFLOPS | Progress: (4/20) | 3.31 s
    [Task 11/25]  Current/Best:   16.87/  18.15 GFLOPS | Progress: (8/20) | 6.01 s
    [Task 11/25]  Current/Best:   17.99/  18.15 GFLOPS | Progress: (12/20) | 8.07 s
    [Task 11/25]  Current/Best:   13.34/  21.21 GFLOPS | Progress: (16/20) | 10.85 s
    [Task 11/25]  Current/Best:   19.47/  21.62 GFLOPS | Progress: (20/20) | 12.88 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.82/  18.11 GFLOPS | Progress: (4/20) | 5.34 s
    [Task 12/25]  Current/Best:    5.20/  18.11 GFLOPS | Progress: (8/20) | 8.98 s
    [Task 12/25]  Current/Best:   19.02/  19.02 GFLOPS | Progress: (12/20) | 11.00 s
    [Task 12/25]  Current/Best:   15.45/  19.02 GFLOPS | Progress: (16/20) | 13.73 s
    [Task 12/25]  Current/Best:   15.14/  19.02 GFLOPS | Progress: (20/20) | 15.65 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.61/  17.20 GFLOPS | Progress: (4/20) | 3.69 s
    [Task 13/25]  Current/Best:   16.00/  21.15 GFLOPS | Progress: (8/20) | 6.11 s
    [Task 13/25]  Current/Best:   19.61/  21.58 GFLOPS | Progress: (12/20) | 8.93 s
    [Task 13/25]  Current/Best:   12.21/  21.58 GFLOPS | Progress: (16/20) | 12.34 s
    [Task 13/25]  Current/Best:   18.81/  21.58 GFLOPS | Progress: (20/20) | 14.64 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.40/  13.40 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 14/25]  Current/Best:    6.12/  13.40 GFLOPS | Progress: (8/20) | 5.53 s
    [Task 14/25]  Current/Best:   20.71/  20.71 GFLOPS | Progress: (12/20) | 8.10 s
    [Task 14/25]  Current/Best:   16.41/  20.71 GFLOPS | Progress: (16/20) | 9.74 s Done.
+
    [Task 14/25]  Current/Best:   17.11/  20.71 GFLOPS | Progress: (20/20) | 11.46 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.19/  17.71 GFLOPS | Progress: (4/20) | 2.71 s
    [Task 15/25]  Current/Best:   14.47/  18.10 GFLOPS | Progress: (8/20) | 4.05 s
    [Task 15/25]  Current/Best:   10.39/  22.29 GFLOPS | Progress: (12/20) | 6.12 s
    [Task 15/25]  Current/Best:   20.42/  22.29 GFLOPS | Progress: (16/20) | 9.02 s
    [Task 15/25]  Current/Best:    9.71/  22.29 GFLOPS | Progress: (20/20) | 9.99 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.36/  20.36 GFLOPS | Progress: (4/20) | 2.98 s
    [Task 16/25]  Current/Best:    3.02/  20.36 GFLOPS | Progress: (8/20) | 4.60 s
    [Task 16/25]  Current/Best:   19.37/  20.36 GFLOPS | Progress: (12/20) | 5.82 s
    [Task 16/25]  Current/Best:   17.35/  20.36 GFLOPS | Progress: (16/20) | 
 7.18 s
    [Task 16/25]  Current/Best:    9.99/  22.39 GFLOPS | Progress: (20/20) | 9.21 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.02/  18.79 GFLOPS | Progress: (4/20) | 4.71 s
    [Task 17/25]  Current/Best:   14.50/  23.36 GFLOPS | Progress: (8/20) | 7.57 s
    [Task 17/25]  Current/Best:   16.95/  23.36 GFLOPS | Progress: (12/20) | 9.65 s
    [Task 17/25]  Current/Best:   16.55/  23.36 GFLOPS | Progress: (16/20) | 11.79 s
    [Task 17/25]  Current/Best:   10.02/  23.36 GFLOPS | Progress: (20/20) | 13.90 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   10.96/  16.93 GFLOPS | Progress: (4/20) | 3.71 s
    [Task 18/25]  Current/Best:   10.52/  19.98 GFLOPS | Progress: (8/20) | 7.13 s
    [Task 18/25]  Current/Best:   19.18/  19.98 GFLOPS | Progress: (12/20) | 9.07 s
    [Task 18/25]  Current/Best:    9.99/  19.98 GFLOPS | Progress: (16/20) | 12.62 s
    [Task 18/25]  Current/Best:   20.39/  20.39 GFLOPS | Progress: (20/20) | 14.15 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.20/  20.35 GFLOPS | Progress: (4/20) | 6.05 s
    [Task 19/25]  Current/Best:    2.60/  20.35 GFLOPS | Progress: (8/20) | 9.34 s
    [Task 19/25]  Current/Best:   20.44/  21.72 GFLOPS | Progress: (12/20) | 12.11 s
    [Task 19/25]  Current/Best:   14.55/  21.72 GFLOPS | Progress: (16/20) | 14.95 s
    [Task 19/25]  Current/Best:    2.70/  23.44 GFLOPS | Progress: (20/20) | 17.74 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    8.96/  15.02 GFLOPS | Progress: (4/20) | 3.33 s Done.
      Done.
-
    [Task 20/25]  Current/Best:   10.26/  14.81 GFLOPS | Progress: (8/20) | 6.70 s
    [Task 20/25]  Current/Best:    2.32/  16.61 GFLOPS | Progress: (12/20) | 10.65 s
    [Task 20/25]  Current/Best:   12.16/  16.61 GFLOPS | Progress: (16/20) | 14.44 s
    [Task 20/25]  Current/Best:   12.70/  21.76 GFLOPS | Progress: (20/20) | 16.52 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.39/  17.69 GFLOPS | Progress: (4/20) | 3.29 s
    [Task 21/25]  Current/Best:   14.44/  17.69 GFLOPS | Progress: (8/20) | 4.85 s
    [Task 21/25]  Current/Best:    1.61/  17.69 GFLOPS | Progress: (12/20) | 7.00 s
    [Task 21/25]  Current/Best:   18.08/  18.08 GFLOPS | Progress: (16/20) | 10.50 s
    [Task 21/25]  Current/Best:    4.47/  18.08 GFLOPS | Progress: (20/20) | 17.78 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  16.95 GFLOPS | Progress: (4/20
 ) | 2.72 s
    [Task 22/25]  Current/Best:    8.87/  21.87 GFLOPS | Progress: (8/20) | 4.70 s
    [Task 22/25]  Current/Best:   20.05/  21.87 GFLOPS | Progress: (12/20) | 7.03 s
    [Task 22/25]  Current/Best:   14.92/  21.87 GFLOPS | Progress: (16/20) | 9.08 s
    [Task 22/25]  Current/Best:   15.18/  21.87 GFLOPS | Progress: (20/20) | 10.76 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.39/  20.21 GFLOPS | Progress: (4/20) | 3.30 s
    [Task 23/25]  Current/Best:   15.70/  20.21 GFLOPS | Progress: (8/20) | 6.67 s
    [Task 23/25]  Current/Best:   20.82/  21.47 GFLOPS | Progress: (12/20) | 8.50 s
    [Task 23/25]  Current/Best:    6.34/  21.47 GFLOPS | Progress: (16/20) | 15.63 s
    [Task 23/25]  Current/Best:    7.66/  21.47 GFLOPS | Progress: (20/20) | 19.91 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.42/   8.42 GFLOPS | Progress: (4/20) | 11.82 s
    [Task 24/25]  Current/Best:    2.07/   8.42 GFLOPS | Progress: (8/20) | 22.89 s
    [Task 24/25]  Current/Best:    4.33/   8.42 GFLOPS | Progress: (12/20) | 34.47 s Done.
+
    [Task 20/25]  Current/Best:    9.68/  15.02 GFLOPS | Progress: (8/20) | 6.76 s
    [Task 20/25]  Current/Best:    2.32/  16.87 GFLOPS | Progress: (12/20) | 10.68 s
    [Task 20/25]  Current/Best:   12.52/  16.87 GFLOPS | Progress: (16/20) | 14.23 s
    [Task 20/25]  Current/Best:   12.92/  22.17 GFLOPS | Progress: (20/20) | 16.35 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.41/  17.71 GFLOPS | Progress: (4/20) | 3.23 s
    [Task 21/25]  Current/Best:   14.66/  17.71 GFLOPS | Progress: (8/20) | 4.78 s
    [Task 21/25]  Current/Best:    1.61/  17.71 GFLOPS | Progress: (12/20) | 6.91 s
    [Task 21/25]  Current/Best:   18.22/  18.22 GFLOPS | Progress: (16/20) | 10.35 s
    [Task 21/25]  Current/Best:    4.46/  18.22 GFLOPS | Progress: (20/20) | 17.33 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  17.03 GFLOPS | Progress: (4/20
 ) | 2.69 s
    [Task 22/25]  Current/Best:    8.82/  21.99 GFLOPS | Progress: (8/20) | 4.59 s
    [Task 22/25]  Current/Best:   20.05/  21.99 GFLOPS | Progress: (12/20) | 6.87 s
    [Task 22/25]  Current/Best:   15.30/  21.99 GFLOPS | Progress: (16/20) | 8.96 s
    [Task 22/25]  Current/Best:   14.08/  21.99 GFLOPS | Progress: (20/20) | 10.61 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.46/  20.53 GFLOPS | Progress: (4/20) | 3.24 s
    [Task 23/25]  Current/Best:   15.83/  20.53 GFLOPS | Progress: (8/20) | 6.61 s
    [Task 23/25]  Current/Best:   20.81/  21.48 GFLOPS | Progress: (12/20) | 8.44 s
    [Task 23/25]  Current/Best:    6.35/  21.48 GFLOPS | Progress: (16/20) | 15.53 s
    [Task 23/25]  Current/Best:    7.83/  21.48 GFLOPS | Progress: (20/20) | 19.75 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.68/   8.68 GFLOPS | Progress: (4/20) | 11.79 s
    [Task 24/25]  Current/Best:    2.14/   8.68 GFLOPS | Progress: (8/20) | 22.85 s
    [Task 24/25]  Current/Best:    4.51/   8.68 GFLOPS | Progress: (12/20) | 34.37 s Done.
      Done.
-
    [Task 24/25]  Current/Best:    7.03/   8.80 GFLOPS | Progress: (16/20) | 39.95 s
    [Task 24/25]  Current/Best:    3.29/   8.99 GFLOPS | Progress: (20/20) | 45.96 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.91 GFLOPS | Progress: (4/20) | 11.62 s
    [Task 25/25]  Current/Best:    5.54/   7.90 GFLOPS | Progress: (8/20) | 22.96 s
    [Task 25/25]  Current/Best:    5.89/   7.90 GFLOPS | Progress: (12/20) | 34.38 s
    [Task 25/25]  Current/Best:    5.72/   9.27 GFLOPS | Progress: (16/20) | 36.13 s
    [Task 25/25]  Current/Best:    2.90/   9.27 GFLOPS | Progress: (20/20) | 46.81 s
+
    [Task 24/25]  Current/Best:    6.20/   8.89 GFLOPS | Progress: (16/20) | 39.75 s
    [Task 24/25]  Current/Best:    3.40/   8.89 GFLOPS | Progress: (20/20) | 45.51 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.94 GFLOPS | Progress: (4/20) | 11.59 s
    [Task 25/25]  Current/Best:    5.94/   8.25 GFLOPS | Progress: (8/20) | 22.85 s
    [Task 25/25]  Current/Best:    5.97/   8.25 GFLOPS | Progress: (12/20) | 34.24 s
    [Task 25/25]  Current/Best:    5.79/   8.71 GFLOPS | Progress: (16/20) | 36.17 s
    [Task 25/25]  Current/Best:    2.87/   8.99 GFLOPS | Progress: (20/20) | 46.83 s
 
 
 
@@ -748,8 +748,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 415.98398269000427, 'median': 416.41282425000554, 'std': 0.8904766088764614}
-    unoptimized: {'mean': 497.4388198099996, 'median': 497.54519420000065, 'std': 0.9764619767640734}
+    optimized: {'mean': 411.12996603999363, 'median': 410.6953183499854, 'std': 0.8527653131833021}
+    unoptimized: {'mean': 493.15499541000463, 'median': 492.9314198500151, 'std': 0.81322431603063}
 
 
 
@@ -772,7 +772,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 10 minutes  21.504 seconds)
+   **Total running time of the script:** ( 10 minutes  13.054 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index bfb95cc6b..fa7f9f89f 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -282,7 +282,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.251e-07 secs/op
+    1.286e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index 0b2784581..f57177d88 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -263,7 +263,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x45d1da0)), stage(b, placeholder(b, 0xcd2b4a0)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min= [...]
+    [stage(a, placeholder(a, 0x22d05700)), stage(b, placeholder(b, 0x23d08550)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index fd484eee9..802b1e41b 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**13:35.106** total execution time for **tutorial** files:
+**12:58.496** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:21.504 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:13.054 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:14.482 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:59.998 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:02.007 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 00:49.310 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:30.908 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:30.027 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.163 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.317 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.159 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.932 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.709 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.699 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.166 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.151 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.006 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.005 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 15e7f718a..35ed521f4 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -301,8 +301,8 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000008
-    naive: 0.000006
+    Numpy running time: 0.000007
+    naive: 0.000007
 
 
 
@@ -512,10 +512,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    8.489330000429617e-06                    1.0
-                   naive    5.881399999999999e-06     0.6927990783374378
-                parallel              6.1597e-06      0.7255814062697854
-                  vector    2.4731900000000003e-05     2.913292332698623
+                   numpy    6.835749995843799e-06                    1.0
+                   naive              6.7348e-06      0.9852320526781733
+                parallel    6.056700000000001e-06     0.8860329888721109
+                  vector    2.4511599999999998e-05    3.5857952697075355
 
 
 
@@ -936,7 +936,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.018762
+    Numpy running time: 0.018614
 
 
 
@@ -996,7 +996,7 @@ optimizations.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    none: 3.457954
+    none: 3.355106
 
 
 
@@ -1101,7 +1101,7 @@ schedule.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    blocking: 0.327590
+    blocking: 0.294225
 
 
 
@@ -1199,7 +1199,7 @@ already cache friendly from our previous optimizations.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    vectorization: 0.349008
+    vectorization: 0.323604
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1275,7 +1275,7 @@ more cache friendly.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    loop permutation: 0.118152
+    loop permutation: 0.118319
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1376,7 +1376,7 @@ optimized schedule.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    array packing: 0.108498
+    array packing: 0.110985
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1471,7 +1471,7 @@ to `C` when all the block results are ready.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    block caching: 0.111071
+    block caching: 0.115142
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1559,7 +1559,7 @@ of thread-level parallelization.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallelization: 0.145116
+    parallelization: 0.145455
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1640,13 +1640,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none      3.4579537459000003                     1.0
-                blocking            0.3275901657     0.09473526535408826
-           vectorization            0.3490081212     0.10092908894857523
-        loop permutation            0.1181516901    0.034168094422919675
-           array packing     0.10849824770000001     0.03137643117078803
-           block caching            0.1110707222     0.03212036087286982
-         parallelization            0.1451162748      0.0419659386630201
+                    none      3.3551055356999995                     1.0
+                blocking            0.2942247807     0.08769464255872172
+           vectorization            0.3236043196      0.0964513086568182
+        loop permutation             0.118319215    0.035265422723972296
+           array packing     0.11098535790000001     0.03307954301856092
+           block caching     0.11514179059999999    0.034318381158158454
+         parallelization     0.14545543239999997     0.04335345963108507
 
 
 
@@ -1686,11 +1686,6 @@ operations with tunable parameters that allows you to automatically optimize
 the computation for specific platforms.
 
 
-.. rst-class:: sphx-glr-timing
-
-   **Total running time of the script:** ( 1 minutes  2.007 seconds)
-
-
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
 
 .. only:: html
diff --git a/docs/arch/device_target_interactions.html b/docs/arch/device_target_interactions.html
index e6cdc6ad5..094a0db90 100644
--- a/docs/arch/device_target_interactions.html
+++ b/docs/arch/device_target_interactions.html
@@ -517,8 +517,8 @@ different code generation targets can run on the same physical device.
 (e.g. The <code class="docutils literal notranslate"><span class="pre">&quot;llvm&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;c&quot;</span></code> targets both run on the <code class="docutils literal notranslate"><span class="pre">kDLCPU</span></code>
 device type.)</p>
 <p>All options for a specific target kind are added with the
-<code class="docutils literal notranslate"><span class="pre">add_attr_option</span></code> function, with optional default values.  A
-preprocessor can be added with <code class="docutils literal notranslate"><span class="pre">set_attrs_preprocessor</span></code> to define
+<code class="docutils literal notranslate"><span class="pre">add_attr_option</span></code> function, with optional default values.  A <cite>Target</cite>
+parser can be added with <code class="docutils literal notranslate"><span class="pre">set_target_parser</span></code> to process
 any parameters that are dynamically based on other parameters or
 queried from device properties.</p>
 <p>This argument definition defines a parser that can unpack a string
diff --git a/docs/commit_hash b/docs/commit_hash
index b9049e4a0..ab0302a8d 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-c79b8f1aac0d11811257123127402d0ceff9135e
+6bad21e9fe711e6994df238e8a3edc89073b894b
diff --git a/docs/how_to/compile_models/from_darknet.html b/docs/how_to/compile_models/from_darknet.html
index 1b16919bd..4e0c6b019 100644
--- a/docs/how_to/compile_models/from_darknet.html
+++ b/docs/how_to/compile_models/from_darknet.html
@@ -569,7 +569,7 @@ class:[&#39;truck 0.9266&#39;] left:471 top:83 right:689 bottom:169
 class:[&#39;bicycle 0.9984&#39;] left:111 top:113 right:577 bottom:447
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  1.615 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  2.060 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-darknet-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7716f96385bd5abb6e822041e285be54/from_darknet.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_darknet.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 3621b1f9c..a9dca1d06 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -422,7 +422,7 @@ to download the full example code</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip6f6e2b1b-6c0c-4741-84cf-56756c687355 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipd556f310-f866-47c2-99dd-fcd8f5721d18 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 7c6c8e518..2607059e9 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -427,11 +427,13 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
- 13%|#2        | 5.19M/41.5M [00:00&lt;00:01, 37.3MB/s]
- 24%|##4       | 10.1M/41.5M [00:00&lt;00:00, 43.3MB/s]
- 56%|#####5    | 23.2M/41.5M [00:00&lt;00:00, 81.6MB/s]
- 76%|#######5  | 31.4M/41.5M [00:00&lt;00:00, 74.9MB/s]
-100%|##########| 41.5M/41.5M [00:00&lt;00:00, 76.4MB/s]
+ 15%|#5        | 6.33M/41.5M [00:00&lt;00:00, 61.3MB/s]
+ 29%|##9       | 12.2M/41.5M [00:00&lt;00:00, 51.9MB/s]
+ 41%|####1     | 17.2M/41.5M [00:00&lt;00:00, 32.5MB/s]
+ 58%|#####7    | 24.0M/41.5M [00:00&lt;00:00, 35.0MB/s]
+ 82%|########2 | 34.1M/41.5M [00:00&lt;00:00, 51.7MB/s]
+ 96%|#########6| 40.0M/41.5M [00:00&lt;00:00, 53.2MB/s]
+100%|##########| 41.5M/41.5M [00:00&lt;00:00, 48.9MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 733e91fea..b4da32af8 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -409,9 +409,10 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 39%|###8      | 17.4M/44.7M [00:00&lt;00:00, 182MB/s]
- 91%|######### | 40.5M/44.7M [00:00&lt;00:00, 217MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 217MB/s]
+ 11%|#         | 4.89M/44.7M [00:00&lt;00:00, 51.3MB/s]
+ 22%|##1       | 9.78M/44.7M [00:00&lt;00:00, 48.3MB/s]
+ 75%|#######5  | 33.5M/44.7M [00:00&lt;00:00, 137MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 132MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 187a31f5f..6b0f4226a 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -631,7 +631,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  3.149 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  1.017 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index 2e4736154..e155ea534 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:02.908</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>04:59.918</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -330,44 +330,44 @@
 <col style="width: 8%" />
 </colgroup>
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:03.149</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
+<td><p>01:02.060</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>01:01.615</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
+<td><p>01:01.017</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>00:39.676</p></td>
+<td><p>00:38.176</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:27.293</p></td>
+<td><p>00:27.033</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:24.874</p></td>
+<td><p>00:26.167</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:24.873</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
+<td><p>00:23.911</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:24.266</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
+<td><p>00:23.600</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:19.690</p></td>
+<td><p>00:20.890</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:15.150</p></td>
+<td><p>00:14.675</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.321</p></td>
+<td><p>00:02.389</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 152d52b8a..e89eb9b7e 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -648,7 +648,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.2943      16.2940      16.5826      16.1340       0.1329
+  15.5571      15.5695      15.7189      15.3987       0.0868
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index beb782c51..6ad3b3a1c 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -431,13 +431,14 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
- 10%|#         | 17.4M/170M [00:00&lt;00:00, 182MB/s]
- 25%|##5       | 42.9M/170M [00:00&lt;00:00, 232MB/s]
- 42%|####2     | 71.8M/170M [00:00&lt;00:00, 264MB/s]
- 59%|#####9    | 100M/170M [00:00&lt;00:00, 278MB/s]
- 75%|#######4  | 127M/170M [00:00&lt;00:00, 261MB/s]
- 89%|########9 | 152M/170M [00:00&lt;00:00, 257MB/s]
-100%|##########| 170M/170M [00:00&lt;00:00, 254MB/s]
+ 11%|#1        | 19.2M/170M [00:00&lt;00:00, 202MB/s]
+ 25%|##5       | 43.0M/170M [00:00&lt;00:00, 230MB/s]
+ 41%|####      | 69.5M/170M [00:00&lt;00:00, 252MB/s]
+ 56%|#####6    | 95.5M/170M [00:00&lt;00:00, 260MB/s]
+ 71%|#######   | 120M/170M [00:00&lt;00:00, 255MB/s]
+ 85%|########5 | 145M/170M [00:00&lt;00:00, 252MB/s]
+ 99%|#########9| 169M/170M [00:00&lt;00:00, 248MB/s]
+100%|##########| 170M/170M [00:00&lt;00:00, 248MB/s]
 /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=&#39;floor&#39;).
@@ -532,7 +533,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  4.193 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  55.583 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index 0d16818ac..2cc13883f 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -475,7 +475,7 @@ training. Other models require a full post training calibration.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 182MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 145MB/s]
 </pre></div>
 </div>
 </div>
@@ -564,7 +564,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.4726      90.3123      98.9719      90.1457       0.8869
+  90.3941      90.2763      96.8060      90.0492       0.7160
 </pre></div>
 </div>
 <div class="admonition note">
@@ -603,7 +603,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.526 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  8.056 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index 9d6161886..87d6c1e89 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -568,7 +568,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  120.6853     120.6432     122.8147     119.8759      0.4554
+  120.4923     120.0808     146.6039     119.2306      2.9008
 </pre></div>
 </div>
 <div class="admonition note">
@@ -596,7 +596,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  52.888 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  51.317 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index e3302ef1b..8356a3eca 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -504,7 +504,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.718 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  21.420 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 7b1edd208..b3e7338e5 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -436,24 +436,23 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  4%|4         | 5309/132723 [00:00&lt;00:02, 50874.49KB/s]
- 10%|9         | 13108/132723 [00:00&lt;00:01, 66534.77KB/s]
- 16%|#5        | 20979/132723 [00:00&lt;00:01, 72038.19KB/s]
- 22%|##1       | 29043/132723 [00:00&lt;00:01, 75411.18KB/s]
- 28%|##7       | 37032/132723 [00:00&lt;00:01, 77016.96KB/s]
- 34%|###3      | 45005/132723 [00:00&lt;00:01, 77933.04KB/s]
- 40%|###9      | 52806/132723 [00:00&lt;00:01, 77914.47KB/s]
- 46%|####5     | 60762/132723 [00:00&lt;00:00, 78430.57KB/s]
- 52%|#####1    | 68609/132723 [00:00&lt;00:00, 77782.41KB/s]
- 58%|#####7    | 76391/132723 [00:01&lt;00:00, 77682.92KB/s]
- 63%|######3   | 84162/132723 [00:01&lt;00:00, 76956.61KB/s]
- 69%|######9   | 91861/132723 [00:01&lt;00:00, 76477.86KB/s]
- 75%|#######4  | 99511/132723 [00:01&lt;00:00, 76299.51KB/s]
- 81%|########  | 107143/132723 [00:01&lt;00:00, 76208.23KB/s]
- 86%|########6 | 114794/132723 [00:01&lt;00:00, 76284.47KB/s]
- 92%|#########2| 122426/132723 [00:01&lt;00:00, 76257.85KB/s]
- 98%|#########8| 130171/132723 [00:01&lt;00:00, 76613.18KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 75958.90KB/s]
+  4%|4         | 5880/132723 [00:00&lt;00:02, 58791.79KB/s]
+ 11%|#         | 14003/132723 [00:00&lt;00:01, 71987.49KB/s]
+ 17%|#6        | 22075/132723 [00:00&lt;00:01, 75972.62KB/s]
+ 23%|##2       | 30136/132723 [00:00&lt;00:01, 77800.13KB/s]
+ 29%|##8       | 38351/132723 [00:00&lt;00:01, 79365.25KB/s]
+ 35%|###5      | 46586/132723 [00:00&lt;00:01, 80377.94KB/s]
+ 41%|####1     | 54807/132723 [00:00&lt;00:00, 80974.29KB/s]
+ 47%|####7     | 62905/132723 [00:00&lt;00:00, 80499.15KB/s]
+ 53%|#####3    | 70956/132723 [00:00&lt;00:00, 80138.98KB/s]
+ 60%|#####9    | 78973/132723 [00:01&lt;00:00, 80145.06KB/s]
+ 66%|######5   | 86988/132723 [00:01&lt;00:00, 79612.47KB/s]
+ 72%|#######1  | 94951/132723 [00:01&lt;00:00, 79317.07KB/s]
+ 78%|#######7  | 102884/132723 [00:01&lt;00:00, 79207.81KB/s]
+ 83%|########3 | 110806/132723 [00:01&lt;00:00, 79166.01KB/s]
+ 89%|########9 | 118761/132723 [00:01&lt;00:00, 79277.74KB/s]
+ 95%|#########5| 126731/132723 [00:01&lt;00:00, 79402.43KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 78885.78KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -496,7 +495,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  36.725 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  29.597 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 29ddb6d23..5db162d56 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>10:45.635</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>10:37.340</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -331,31 +331,31 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>03:04.193</p></td>
+<td><p>02:55.583</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>02:36.725</p></td>
+<td><p>02:29.597</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>01:52.888</p></td>
+<td><p>01:51.317</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:10.526</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
+<td><p>01:21.420</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:07.718</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
+<td><p>01:08.056</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:30.666</p></td>
+<td><p>00:29.064</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:22.913</p></td>
+<td><p>00:22.297</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 6724e8604..231614247 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -607,7 +607,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipd459995e-b6f1-4d10-b361-e0501c591274 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip1a596816-b990-4e41-b512-bf8d7b091a2a from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index 6781ff7eb..e13e6a0d4 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:41.777</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:40.193</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -331,19 +331,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:38.544</p></td>
+<td><p>00:37.083</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.264</p></td>
+<td><p>00:02.185</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:00.961</p></td>
+<td><p>00:00.914</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
-<td><p>00:00.008</p></td>
+<td><p>00:00.010</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index 62002a980..7ad8a1471 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -507,10 +507,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6842us [6842us] (46.48%; 46.48%)
-FoldScaleAxis: 7878us [5us] (53.52%; 53.52%)
-        FoldConstant: 7872us [1610us] (53.48%; 99.93%)
-                InferType: 6262us [6262us] (42.55%; 79.55%)
+InferType: 6613us [6613us] (45.61%; 45.61%)
+FoldScaleAxis: 7886us [5us] (54.39%; 54.39%)
+        FoldConstant: 7881us [1577us] (54.35%; 99.93%)
+                InferType: 6303us [6303us] (43.47%; 79.98%)
 </pre></div>
 </div>
 </div>
@@ -532,10 +532,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6421us [6421us] (44.65%; 44.65%)
-FoldScaleAxis: 7959us [5us] (55.35%; 55.35%)
-        FoldConstant: 7954us [1672us] (55.31%; 99.93%)
-                InferType: 6282us [6282us] (43.68%; 78.98%)
+InferType: 6220us [6220us] (44.68%; 44.68%)
+FoldScaleAxis: 7700us [5us] (55.32%; 55.32%)
+        FoldConstant: 7695us [1596us] (55.28%; 99.94%)
+                InferType: 6099us [6099us] (43.81%; 79.26%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index 8a19b10d0..efd35842c 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -559,7 +559,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.188965 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 34.777033 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 366beaf32..1d99687dd 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -901,7 +901,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 6.942971 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 12.743089 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index d77d07da3..c0762d238 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -456,8 +456,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019226
-Baseline: 3.450500
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018402
+Baseline: 3.209169
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -517,7 +517,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.309829
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.290653
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -584,7 +584,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.346741
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.331112
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -645,7 +645,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.121748
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.117239
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -728,7 +728,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110637
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.111911
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -814,7 +814,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111510
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111434
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -904,7 +904,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.145426
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.144853
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index d48b4a5d9..87ddaa121 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:34.997</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:33.786</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,15 +331,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:32.679</p></td>
+<td><p>00:31.406</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.271</p></td>
+<td><p>00:01.348</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:01.046</p></td>
+<td><p>00:01.032</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index c938ed698..9dbb34c54 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>06:14.584</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>05:56.544</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -331,27 +331,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>03:27.225</p></td>
+<td><p>03:14.110</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:23.284</p></td>
+<td><p>01:21.555</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>00:46.637</p></td>
+<td><p>00:45.380</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:19.310</p></td>
+<td><p>00:18.189</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:09.166</p></td>
+<td><p>00:08.738</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:08.963</p></td>
+<td><p>00:08.571</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index 1dad94fea..8954814fb 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -487,76 +487,222 @@ cooperative fetching, unrolling and operator fusion.</p>
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
   preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
   attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 32;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [7]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [648]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [1152]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [7], [], scope=&quot;local&quot;, align=16)[0] = 0f32
+  allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [2016]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
+    conv2d_nchw_1[7] = 0f32
     conv2d_nchw_1[1] = 0f32
+    conv2d_nchw_1[8] = 0f32
     conv2d_nchw_1[2] = 0f32
+    conv2d_nchw_1[9] = 0f32
     conv2d_nchw_1[3] = 0f32
+    conv2d_nchw_1[10] = 0f32
     conv2d_nchw_1[4] = 0f32
+    conv2d_nchw_1[11] = 0f32
     conv2d_nchw_1[5] = 0f32
+    conv2d_nchw_1[12] = 0f32
     conv2d_nchw_1[6] = 0f32
-    for (rc.outer.outer: int32, 0, 64) {
-      let cse_var_2: int32 = (rc.outer.outer*392)
-      let cse_var_1: int32 = (rc.outer.outer*72)
-       {
-        attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        pad_temp.shared_1: Buffer(pad_temp.shared, float32, [648], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else(((((9 &lt;= floormod(threadIdx.x_1, 81)) &amp;&amp; (floormod(threadIdx.x_1, 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data[((((cse_var_2 + (floordiv(threadIdx.x_1, 81)*49)) + (floordiv(floormod(threadIdx.x_1, 81), 9)*7)) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        pad_temp.shared_1[(threadIdx.x_1 + 112)] = @tir.if_then_else(((((9 &lt;= floormod((threadIdx.x_1 + 31), 81)) &amp;&amp; (floormod((threadIdx.x_1 + 31), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 112), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 31), 81), 9)*7)) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((9 &lt;= floormod((threadIdx.x_1 + 62), 81)) &amp;&amp; (floormod((threadIdx.x_1 + 62), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 224), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 62), 81), 9)*7)) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        pad_temp.shared_1[(threadIdx.x_1 + 336)] = @tir.if_then_else(((((9 &lt;= floormod((threadIdx.x_1 + 12), 81)) &amp;&amp; (floormod((threadIdx.x_1 + 12), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 336), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 12), 81), 9)*7)) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        pad_temp.shared_1[(threadIdx.x_1 + 448)] = @tir.if_then_else(((((9 &lt;= floormod((threadIdx.x_1 + 43), 81)) &amp;&amp; (floormod((threadIdx.x_1 + 43), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 448), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 43), 81), 9)*7)) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        if @tir.likely((threadIdx.x_1 &lt; 88), dtype=bool) {
-          pad_temp.shared_1[(threadIdx.x_1 + 560)] = @tir.if_then_else(((((9 &lt;= floormod((threadIdx.x_1 + 74), 81)) &amp;&amp; (floormod((threadIdx.x_1 + 74), 81) &lt; 72)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data[((((cse_var_2 + (floordiv((threadIdx.x_1 + 560), 81)*49)) + (floordiv(floormod((threadIdx.x_1 + 74), 81), 9)*7)) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-        }
-        attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1: Buffer(kernel.shared, float32, [1152], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 72)*4608)) + cse_var_1) + floormod(threadIdx.x_2, 72))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 112), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 4), 9), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 224), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 8), 72), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 336), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 48), 72), 9)*9)) + (floormod((floordiv(threadIdx.x_2, 3) + 1), 3)*3)) + floormod(threadIdx.x_2, 3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 448), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 7), 9), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 560)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 560), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 72), 9)*9)) + floormod((threadIdx.x_2 + 2), 9))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 672), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 24), 72), 9)*9)) + (floormod((floordiv(threadIdx.x_2, 3) + 2), 3)*3)) + floormod(threadIdx.x_2, 3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[(((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 64), 72), 9)*9)) + floormod((threadIdx.x_2 + 1), 9))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 896), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 32), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 5), 9), 3)*3)) + floormod((threadIdx.x_2 + 2), 3))]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        kernel.shared_1[(threadIdx.x_2 + 1008)] = kernel[(((((blockIdx.x*73728) + (floordiv(threadIdx.x_2, 72)*4608)) + cse_var_1) + floormod(threadIdx.x_2, 72)) + 64512)]
-        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 112;
-        if @tir.likely((threadIdx.x_2 &lt; 32), dtype=bool) {
-          kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1120), 72)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 72), 9)*9)) + (floordiv(floormod((threadIdx.x_2 + 4), 9), 3)*3)) + floormod((threadIdx.x_2 + 1), 3))]
-        }
-        for (rx.outer.inner: int32, 0, 3) {
-          for (rc.inner: int32, 0, 8) {
-            for (ry.inner: int32, 0, 3) {
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7))]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 9)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 18)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 27)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 36)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 45)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((((rc.inner*81) + (ry.inner*9)) + rx.outer.inner) + floormod(threadIdx.x, 7)) + 54)]*kernel.shared_1[((((floordiv(threadIdx.x, 7)*72) + (rc.inner*9)) + (ry.inner*3)) + rx.outer.inner)]))
-            }
+    conv2d_nchw_1[13] = 0f32
+    for (rc.outer.outer: int32, 0, 16) {
+      for (rx.outer.outer: int32, 0, 3) {
+        let cse_var_2: int32 = (rc.outer.outer*1568)
+        let cse_var_1: int32 = (rc.outer.outer*288)
+         {
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+            pad_temp.shared_1: Buffer(pad_temp.shared, float32, [2016], [], scope=&quot;shared&quot;)[((floordiv((threadIdx.x_1*12), 7)*7) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((7 &lt;= floormod((threadIdx.x_1*12), 63)) &amp;&amp; (floormod((threadIdx.x_1*12), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) &amp;&amp; ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((threadIdx.x_1*12), 6 [...]
+            pad_temp.shared_1[(((floordiv((threadIdx.x_1*4), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 1), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 1), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 1), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((t [...]
+            pad_temp.shared_1[(((floordiv((threadIdx.x_1*4), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 2), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 2), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 2), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) &lt; 8)), data[(((((cse_var_2 + (floordiv((t [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 1), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 3), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 3), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 3), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) &lt; 8)), data[(((((cse_var_2 + (floor [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 1), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 4), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 4), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 4), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) &lt; 8)), data[(((((cse_var_2 + (floor [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 1), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 5), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 5), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 5), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 5), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)) &lt; 8)), data[(((((cse_var_2 + (floor [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 2), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 6), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 6), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 6), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 6), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)) &lt; 8)), data[(((((cse_var_2 + (floor [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 2), 21)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 1), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv((threadIdx.x_1*12), 7) + 1), 9)) &amp;&amp; (floormod((floordiv((threadIdx.x_1*12), 7) + 1), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) &amp;&amp; ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) &lt; 8)), data[(((((cse_var_2 + (f [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 2), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 8), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 8), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 8), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)) &lt; 8)), data[(((((cse_var_2 + (floor [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 3), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 9), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 9), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 9), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)) &lt; 8)), data[(((((cse_var_2 + (floor [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 3), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 10), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 10), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 10), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)) &lt; 8)), data[(((((cse_var_2 + (fl [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 3), 21)*63) + (floordiv(floormod(((threadIdx.x_1*12) + 11), 63), 7)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((7 &lt;= floormod(((threadIdx.x_1*12) + 11), 63)) &amp;&amp; (floormod(((threadIdx.x_1*12) + 11), 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)) &lt; 8)), data[(((((cse_var_2 + (fl [...]
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*12) + 672), 63)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 6), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv((threadIdx.x_1*12), 7) + 6), 9)) &amp;&amp; (floormod((floordiv((threadIdx.x_1*12), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) &amp;&amp; ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) &lt; 8)), data[(((((cse_var_2 + [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 224), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 225), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 5),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 6), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 6),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 21)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 7), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv((threadIdx.x_1*12), 7) + 7), 9)) &amp;&amp; (floormod((floordiv((threadIdx.x_1*12), 7) + 7), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) &amp;&amp; ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) &lt; 8)), data[(((((cse_var_2 +  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 226), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3 [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 227), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 6), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 6), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 6), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4 [...]
+          }
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*12) + 1344), 63)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 3), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv((threadIdx.x_1*12), 7) + 3), 9)) &amp;&amp; (floormod((floordiv((threadIdx.x_1*12), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) &amp;&amp; ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) &lt; 8)), data[(((((cse_var_2  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 448), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 1), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 448), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 2), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 449), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 3), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 449), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 4), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 449), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 5), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 5), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 5),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 450), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 6), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 6), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 6), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 6),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 450), 21)*63) + (floormod((floordiv((threadIdx.x_1*12), 7) + 4), 9)*7)) + floormod((threadIdx.x_1*5), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv((threadIdx.x_1*12), 7) + 4), 9)) &amp;&amp; (floormod((floordiv((threadIdx.x_1*12), 7) + 4), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod((threadIdx.x_1*5), 7)))) &amp;&amp; ((rx.outer.outer + floormod((threadIdx.x_1*5), 7)) &lt; 8)), data[(((((cse_var_2 +  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 450), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 1), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 8), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 1), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 1),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 451), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 2), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 9), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 2), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 2),  [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 451), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 3), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 10), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 3), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 3 [...]
+            pad_temp.shared_1[(((floordiv(((threadIdx.x_1*4) + 451), 21)*63) + (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 3), 9)*7)) + floormod(((threadIdx.x_1*5) + 4), 7))] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 3), 9)) &amp;&amp; (floormod((floordiv(((threadIdx.x_1*12) + 11), 7) + 3), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(((threadIdx.x_1*5) + 4), 7)))) &amp;&amp; ((rx.outer.outer + floormod(((threadIdx.x_1*5) + 4 [...]
+          }
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 56), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 112), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 168), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 24), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 224), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 32), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 280), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 88), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 336), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 16), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 392), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 8), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 448), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 64), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 504)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 504), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 8)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 560)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 560), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 80), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 616)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 616), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[(((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer) + 32256)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 728)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 728), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 784)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 784), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 840)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 840), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 24), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 896), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 32), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 952)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 952), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 88), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1008)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1008), 96)*4608)) + cse_var_1) + (floormod((floordiv(threadIdx.x_2, 3) + 16), 32)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1064)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1064), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 8), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1120), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 64), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1176), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 8)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1232)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1232), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 80), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1288)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1288), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 40), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((blockIdx.x*73728) + cse_var_1) + (threadIdx.x_2*3)) + rx.outer.outer) + 64512)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1400)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1400), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 56), 96), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 1456)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1456), 96)*4608)) + cse_var_1) + (floordiv(floormod((threadIdx.x_2 + 16), 96), 3)*9)) + (floormod((threadIdx.x_2 + 1), 3)*3)) + rx.outer.outer)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          if @tir.likely((threadIdx.x_2 &lt; 24), dtype=bool) {
+            kernel.shared_1[(threadIdx.x_2 + 1512)] = kernel[((((((blockIdx.x*73728) + (floordiv((threadIdx.x_2 + 1512), 96)*4608)) + cse_var_1) + ((floordiv(threadIdx.x_2, 3) + 24)*9)) + (floormod(threadIdx.x_2, 3)*3)) + rx.outer.outer)]
+          }
+          for (rc.outer.inner: int32, 0, 16) {
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 63)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 63)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 64)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 64)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 65)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 65)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 66)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 66)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 67)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 67)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 68)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 68)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 96)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 69)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 69)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 99)]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 70)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 70)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 71)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 71)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 9)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 72)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 72)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 73)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 73)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 74)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 74)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 12)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 75)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 75)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 13)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 97)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 76)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 76)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 100)]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 14)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 77)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 77)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 15)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 78)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 78)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 16)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 79)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 79)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 17)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 80)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 80)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 18)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 81)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 82)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 98)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 5)]))
+            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*126) + (floormod(threadIdx.x, 7)*7)) + 83)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*192) + (rc.outer.inner*6)) + 101)]))
           }
         }
       }
     }
-    for (i2.inner: int32, 0, 7) {
-      compute[((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*49)) + (i2.inner*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[i2.inner] + bias[((blockIdx.x*16) + floordiv(threadIdx.x, 7))]), 0f32)
+    for (i1.inner: int32, 0, 2) {
+      for (i3.inner: int32, 0, 7) {
+        compute[(((((blockIdx.x*784) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*16) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+      }
     }
   }
 }
@@ -593,7 +739,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.259 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.401 ms
 </pre></div>
 </div>
 </div>
@@ -622,36 +768,36 @@ conv2d_nchw_nn_o_i, conv2d_nchw_nn_i = s[conv2d_nchw].split(conv2d_nchw_nn, fact
 conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
-conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
+conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=2)
 conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=16)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
 conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
-conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=7)
+conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=8)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=1)
-conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
-conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
+conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
+conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
 conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=16)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
 compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
-compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
-compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -671,14 +817,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=12)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=112)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 16)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 1024)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -696,57 +842,186 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(112) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[7];
-  __shared__ float pad_temp_shared[648];
-  __shared__ float kernel_shared[1152];
+extern &quot;C&quot; __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[14];
+  __shared__ float pad_temp_shared[2016];
+  __shared__ float kernel_shared[1536];
   conv2d_nchw[0] = 0.000000e+00f;
+  conv2d_nchw[7] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
+  conv2d_nchw[8] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
+  conv2d_nchw[9] = 0.000000e+00f;
   conv2d_nchw[3] = 0.000000e+00f;
+  conv2d_nchw[10] = 0.000000e+00f;
   conv2d_nchw[4] = 0.000000e+00f;
+  conv2d_nchw[11] = 0.000000e+00f;
   conv2d_nchw[5] = 0.000000e+00f;
+  conv2d_nchw[12] = 0.000000e+00f;
   conv2d_nchw[6] = 0.000000e+00f;
-  for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
-    __syncthreads();
-    pad_temp_shared[((int)threadIdx.x)] = (((((9 &lt;= (((int)threadIdx.x) % 81)) &amp;&amp; ((((int)threadIdx.x) % 81) &lt; 72)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + ((((int)threadIdx.x) / 81) * 49)) + (((((int)threadIdx.x) % 81) / 9) * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
-    pad_temp_shared[(((int)threadIdx.x) + 112)] = (((((9 &lt;= ((((int)threadIdx.x) + 31) % 81)) &amp;&amp; (((((int)threadIdx.x) + 31) % 81) &lt; 72)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 112) / 81) * 49)) + ((((((int)threadIdx.x) + 31) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-    pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((9 &lt;= ((((int)threadIdx.x) + 62) % 81)) &amp;&amp; (((((int)threadIdx.x) + 62) % 81) &lt; 72)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 224) / 81) * 49)) + ((((((int)threadIdx.x) + 62) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-    pad_temp_shared[(((int)threadIdx.x) + 336)] = (((((9 &lt;= ((((int)threadIdx.x) + 12) % 81)) &amp;&amp; (((((int)threadIdx.x) + 12) % 81) &lt; 72)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 336) / 81) * 49)) + ((((((int)threadIdx.x) + 12) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-    pad_temp_shared[(((int)threadIdx.x) + 448)] = (((((9 &lt;= ((((int)threadIdx.x) + 43) % 81)) &amp;&amp; (((((int)threadIdx.x) + 43) % 81) &lt; 72)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 448) / 81) * 49)) + ((((((int)threadIdx.x) + 43) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-    if (((int)threadIdx.x) &lt; 88) {
-      pad_temp_shared[(((int)threadIdx.x) + 560)] = (((((9 &lt;= ((((int)threadIdx.x) + 74) % 81)) &amp;&amp; (((((int)threadIdx.x) + 74) % 81) &lt; 72)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 560) / 81) * 49)) + ((((((int)threadIdx.x) + 74) % 81) / 9) * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-    }
-    kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) / 72) * 4608)) + (rc_outer_outer * 72)) + (((int)threadIdx.x) % 72))];
-    kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 112) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 40) % 72) / 9) * 9)) + ((((((int)threadIdx.x) + 4) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-    kernel_shared[(((int)threadIdx.x) + 224)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 224) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 72) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-    kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 336) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 48) % 72) / 9) * 9)) + ((((((int)threadIdx.x) / 3) + 1) % 3) * 3)) + (((int)threadIdx.x) % 3))];
-    kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 448) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 72) / 9) * 9)) + ((((((int)threadIdx.x) + 7) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-    kernel_shared[(((int)threadIdx.x) + 560)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 560) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 56) % 72) / 9) * 9)) + ((((int)threadIdx.x) + 2) % 9))];
-    kernel_shared[(((int)threadIdx.x) + 672)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 672) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 24) % 72) / 9) * 9)) + ((((((int)threadIdx.x) / 3) + 2) % 3) * 3)) + (((int)threadIdx.x) % 3))];
-    kernel_shared[(((int)threadIdx.x) + 784)] = kernel[(((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 64) % 72) / 9) * 9)) + ((((int)threadIdx.x) + 1) % 9))];
-    kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 896) / 72) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 32) % 72) / 9) * 9)) + ((((((int)threadIdx.x) + 5) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-    kernel_shared[(((int)threadIdx.x) + 1008)] = kernel[(((((((int)blockIdx.x) * 73728) + ((((int)threadIdx.x) / 72) * 4608)) + (rc_outer_outer * 72)) + (((int)threadIdx.x) % 72)) + 64512)];
-    if (((int)threadIdx.x) &lt; 32) {
-      kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1120) / 72) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) + 40) / 9) * 9)) + ((((((int)threadIdx.x) + 4) % 9) / 3) * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-    }
-    __syncthreads();
-    for (int rx_outer_inner = 0; rx_outer_inner &lt; 3; ++rx_outer_inner) {
-      for (int rc_inner = 0; rc_inner &lt; 8; ++rc_inner) {
-        for (int ry_inner = 0; ry_inner &lt; 3; ++ry_inner) {
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 9)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 18)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 27)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 36)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 45)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((((rc_inner * 81) + (ry_inner * 9)) + rx_outer_inner) + (((int)threadIdx.x) % 7)) + 54)] * kernel_shared[(((((((int)threadIdx.x) / 7) * 72) + (rc_inner * 9)) + (ry_inner * 3)) + rx_outer_inner)]));
-        }
+  conv2d_nchw[13] = 0.000000e+00f;
+  for (int rc_outer_outer = 0; rc_outer_outer &lt; 16; ++rc_outer_outer) {
+    for (int rx_outer_outer = 0; rx_outer_outer &lt; 3; ++rx_outer_outer) {
+      __syncthreads();
+      pad_temp_shared[((((((int)threadIdx.x) * 12) / 7) * 7) + ((((int)threadIdx.x) * 5) % 7))] = (((((7 &lt;= ((((int)threadIdx.x) * 12) % 63)) &amp;&amp; (((((int)threadIdx.x) * 12) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) &amp;&amp; ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) * 12) / 63) * 49)) + ((((((int)threadIdx.x) * 12) % 63) / 7) * 7)) + rx_outer_outer) + (( [...]
+      pad_temp_shared[(((((((int)threadIdx.x) * 4) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 1) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 1) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 1) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) * 4 [...]
+      pad_temp_shared[(((((((int)threadIdx.x) * 4) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 2) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 2) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 2) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + (((((int)threadIdx.x) * 4 [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 1) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 3) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 3) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 3) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 1) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 4) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 4) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 4) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 1) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 5) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 5) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 5) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 5) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 2) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 6) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 6) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 6) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 6) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 2) / 21) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 1) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 &lt;= ((((((int)threadIdx.x) * 12) / 7) + 1) % 9)) &amp;&amp; (((((((int)threadIdx.x) * 12) / 7) + 1) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) &amp;&amp; ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) + [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 2) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 8) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 8) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 8) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 3) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 9) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 9) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 9) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 3) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 10) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 10) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 10) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)thread [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 3) / 21) * 63) + (((((((int)threadIdx.x) * 12) + 11) % 63) / 7) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((7 &lt;= (((((int)threadIdx.x) * 12) + 11) % 63)) &amp;&amp; ((((((int)threadIdx.x) * 12) + 11) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)thread [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 12) + 672) / 63) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 6) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 &lt;= ((((((int)threadIdx.x) * 12) / 7) + 6) % 9)) &amp;&amp; (((((((int)threadIdx.x) * 12) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) &amp;&amp; ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 1 [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 1) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 1) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 1) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 224) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 2) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 2) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 2) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 3) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 3) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 3) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 4) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 4) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 4) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 225) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 5) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 5) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 5) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 5) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 6) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 6) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 6) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 6) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 21) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 7) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 &lt;= ((((((int)threadIdx.x) * 12) / 7) + 7) % 9)) &amp;&amp; (((((((int)threadIdx.x) * 12) / 7) + 7) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) &amp;&amp; ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 226) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 8) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 8) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 8) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 9) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 9) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 9) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 10) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 10) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 10) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) &lt; 8)) ? data[((((((rc_outer_oute [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 227) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 11) / 7) + 6) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 11) / 7) + 6) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 11) / 7) + 6) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_oute [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 12) + 1344) / 63) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 3) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 &lt;= ((((((int)threadIdx.x) * 12) / 7) + 3) % 9)) &amp;&amp; (((((((int)threadIdx.x) * 12) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) &amp;&amp; ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) *  [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 448) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 1) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 1) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 1) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 448) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 2) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 2) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 2) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 449) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 3) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 3) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 3) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 449) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 4) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 4) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 4) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 449) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 5) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 5) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 5) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 5) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 450) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 6) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 6) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 6) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 6) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 6) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 450) / 21) * 63) + (((((((int)threadIdx.x) * 12) / 7) + 4) % 9) * 7)) + ((((int)threadIdx.x) * 5) % 7))] = (((((1 &lt;= ((((((int)threadIdx.x) * 12) / 7) + 4) % 9)) &amp;&amp; (((((((int)threadIdx.x) * 12) / 7) + 4) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)))) &amp;&amp; ((rx_outer_outer + ((((int)threadIdx.x) * 5) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 1568) + ((((((int)threadIdx.x) * 4) [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 450) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 8) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 1) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 8) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 8) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 1) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 451) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 9) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 2) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 9) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 9) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 2) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 451) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 10) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 3) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 10) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 10) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 3) % 7)) &lt; 8)) ? data[((((((rc_outer_oute [...]
+      pad_temp_shared[((((((((int)threadIdx.x) * 4) + 451) / 21) * 63) + ((((((((int)threadIdx.x) * 12) + 11) / 7) + 3) % 9) * 7)) + (((((int)threadIdx.x) * 5) + 4) % 7))] = (((((1 &lt;= (((((((int)threadIdx.x) * 12) + 11) / 7) + 3) % 9)) &amp;&amp; ((((((((int)threadIdx.x) * 12) + 11) / 7) + 3) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)))) &amp;&amp; ((rx_outer_outer + (((((int)threadIdx.x) * 5) + 4) % 7)) &lt; 8)) ? data[((((((rc_outer_oute [...]
+      kernel_shared[((int)threadIdx.x)] = kernel[((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 56)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 56) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 56) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 112) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 168)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 168) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 24) &amp; 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 224) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 32) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 280) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 88) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 336) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 16) &amp; 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 392) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 448) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 64) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 504)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 504) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 72)];
+      kernel_shared[(((int)threadIdx.x) + 560)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 560) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 80) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 616)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 616) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 40) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 672)] = kernel[(((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 32256)];
+      kernel_shared[(((int)threadIdx.x) + 728)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 728) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 56) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 784)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 784) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 840)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 840) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 24) &amp; 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 896) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 32) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 952)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 952) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 88) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1008)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1008) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) / 3) + 16) &amp; 31) * 9)) + ((((int)threadIdx.x) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1064)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1064) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 8) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1120) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 64) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1176) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 72)];
+      kernel_shared[(((int)threadIdx.x) + 1232)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1232) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 80) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1288)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1288) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 40) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[(((((((int)blockIdx.x) * 73728) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 64512)];
+      kernel_shared[(((int)threadIdx.x) + 1400)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1400) / 96) * 4608)) + (rc_outer_outer * 288)) + ((((((int)threadIdx.x) + 56) % 96) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
+      kernel_shared[(((int)threadIdx.x) + 1456)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1456) / 96) * 4608)) + (rc_outer_outer * 288)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (((((int)threadIdx.x) + 1) % 3) * 3)) + rx_outer_outer)];
+      if (((int)threadIdx.x) &lt; 24) {
+        kernel_shared[(((int)threadIdx.x) + 1512)] = kernel[((((((((int)blockIdx.x) * 73728) + (((((int)threadIdx.x) + 1512) / 96) * 4608)) + (rc_outer_outer * 288)) + (((int)threadIdx.x) * 3)) + rx_outer_outer) + 216)];
+      }
+      __syncthreads();
+      for (int rc_outer_inner = 0; rc_outer_inner &lt; 16; ++rc_outer_inner) {
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7))] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7))] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 63)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 63)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 1)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 64)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 64)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 2)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 65)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 65)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 3)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 66)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 66)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 4)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 67)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 67)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 5)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 68)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 68)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 6)] * kernel_shared[(((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6))]));
+        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 96)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 69)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 3)]));
+        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 69)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 99)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 70)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 70)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 71)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 71)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 9)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 72)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 72)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 73)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 73)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 74)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 74)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 12)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 75)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 75)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 1)]));
+        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 13)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 97)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 76)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 4)]));
+        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 76)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 100)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 14)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 77)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 77)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 15)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 78)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 78)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 16)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 79)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 79)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 17)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 80)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 80)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 18)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 81)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 82)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 2)]));
+        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 98)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 5)]));
+        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 126) + ((((int)threadIdx.x) % 7) * 7)) + 83)] * kernel_shared[((((((int)threadIdx.x) / 7) * 192) + (rc_outer_inner * 6)) + 101)]));
       }
     }
   }
-  for (int i2_inner = 0; i2_inner &lt; 7; ++i2_inner) {
-    compute[((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 49)) + (i2_inner * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[i2_inner] + bias[((((int)blockIdx.x) * 16) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
+  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
+    for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
+      compute[(((((((int)blockIdx.x) * 784) + ((((int)threadIdx.x) / 7) * 98)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 16) + ((((int)threadIdx.x) / 7) * 2)) + i1_inner)]), 0.000000e+00f);
+    }
   }
 }
 </pre></div>
@@ -783,7 +1058,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  27.225 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  14.110 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index d2386df1c..41f2dfcf4 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -901,7 +901,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   9.6846       9.6935       9.7301       9.6302       0.0413
+   9.7934       9.8060       9.8157       9.7584       0.0251
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index 0cd816a91..ed26c3e10 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -920,7 +920,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  753.6402     753.3775     754.2426     753.3006      0.4271
+  773.7814     773.5620     774.6151     773.1672      0.6112
 </pre></div>
 </div>
 </div>
@@ -942,7 +942,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  23.284 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  21.555 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index e2f8c63e3..e3dba7f99 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -620,75 +620,77 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-  preflattened_buffer_map = {placeholder_7: placeholder_15: Buffer(placeholder_12, int32, [4916], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_6: placeholder_18: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], [])} {
-  for (i0.outer.i1.outer.fused: int32, 0, 256) &quot;parallel&quot; {
-    allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
-      for (nb_j.inner: int32, 0, 2) {
-        for (i.inner.init: int32, 0, 8) {
-          let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
-           {
-            compute_5: Buffer(compute_4, float32, [256], [])[cse_var_1] = 0f32
-            compute_5[(cse_var_1 + 1)] = 0f32
-            compute_5[(cse_var_1 + 2)] = 0f32
-            compute_5[(cse_var_1 + 3)] = 0f32
-            compute_5[(cse_var_1 + 4)] = 0f32
-            compute_5[(cse_var_1 + 5)] = 0f32
-            compute_5[(cse_var_1 + 6)] = 0f32
-            compute_5[(cse_var_1 + 7)] = 0f32
-            compute_5[(cse_var_1 + 8)] = 0f32
-            compute_5[(cse_var_1 + 9)] = 0f32
-            compute_5[(cse_var_1 + 10)] = 0f32
-            compute_5[(cse_var_1 + 11)] = 0f32
-            compute_5[(cse_var_1 + 12)] = 0f32
-            compute_5[(cse_var_1 + 13)] = 0f32
-            compute_5[(cse_var_1 + 14)] = 0f32
-            compute_5[(cse_var_1 + 15)] = 0f32
-          }
-        }
-        for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-          for (i.inner: int32, 0, 8) {
-            let cse_var_21: int32 = (elem_idx*16)
-            let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
-            let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-            let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*2048) + (i.inner*256))
-            let cse_var_17: int32 = (cse_var_20 + 9)
-            let cse_var_16: int32 = (cse_var_20 + 8)
-            let cse_var_15: int32 = (cse_var_20 + 7)
-            let cse_var_14: int32 = (cse_var_20 + 6)
-            let cse_var_13: int32 = (cse_var_20 + 5)
-            let cse_var_12: int32 = (cse_var_20 + 4)
-            let cse_var_11: int32 = (cse_var_20 + 3)
-            let cse_var_10: int32 = (cse_var_20 + 2)
-            let cse_var_9: int32 = (cse_var_20 + 15)
-            let cse_var_8: int32 = (cse_var_20 + 14)
-            let cse_var_7: int32 = (cse_var_20 + 13)
-            let cse_var_6: int32 = (cse_var_20 + 12)
-            let cse_var_5: int32 = (cse_var_20 + 11)
-            let cse_var_4: int32 = (cse_var_20 + 10)
-            let cse_var_3: int32 = (cse_var_20 + 1)
+  preflattened_buffer_map = {placeholder_9: placeholder_15: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_6: placeholder_17: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_8: placeholder_18: Buffer(placeholder_13, int32, [33], []), placeholder_7: placeholder_19: Buffer(placeholder_12, int32, [4916], [])} {
+  for (i0.outer.i1.outer.fused: int32, 0, 16) &quot;parallel&quot; {
+    allocate(compute_4: Pointer(global float32), float32, [4096]), storage_scope = global {
+      for (i.outer.inner: int32, 0, 4) {
+        for (nb_j.inner: int32, 0, 2) {
+          for (i.inner.init: int32, 0, 32) {
+            let cse_var_1: int32 = (((i.outer.inner*1024) + (i.inner.init*32)) + (nb_j.inner*16))
              {
-              compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5: Buffer(compute_4, float32, [4096], [])[cse_var_1] = 0f32
+              compute_5[(cse_var_1 + 1)] = 0f32
+              compute_5[(cse_var_1 + 2)] = 0f32
+              compute_5[(cse_var_1 + 3)] = 0f32
+              compute_5[(cse_var_1 + 4)] = 0f32
+              compute_5[(cse_var_1 + 5)] = 0f32
+              compute_5[(cse_var_1 + 6)] = 0f32
+              compute_5[(cse_var_1 + 7)] = 0f32
+              compute_5[(cse_var_1 + 8)] = 0f32
+              compute_5[(cse_var_1 + 9)] = 0f32
+              compute_5[(cse_var_1 + 10)] = 0f32
+              compute_5[(cse_var_1 + 11)] = 0f32
+              compute_5[(cse_var_1 + 12)] = 0f32
+              compute_5[(cse_var_1 + 13)] = 0f32
+              compute_5[(cse_var_1 + 14)] = 0f32
+              compute_5[(cse_var_1 + 15)] = 0f32
+            }
+          }
+          for (elem_idx: int32, 0, let cse_var_2: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+            for (i.inner: int32, 0, 32) {
+              let cse_var_21: int32 = (elem_idx*16)
+              let cse_var_20: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner)
+              let cse_var_19: int32 = ((i.outer.inner*8192) + (i.inner*256))
+              let cse_var_18: int32 = (((i.outer.inner*1024) + (i.inner*32)) + (nb_j.inner*16))
+              let cse_var_17: int32 = (cse_var_18 + 9)
+              let cse_var_16: int32 = (cse_var_18 + 8)
+              let cse_var_15: int32 = (cse_var_18 + 7)
+              let cse_var_14: int32 = (cse_var_18 + 6)
+              let cse_var_13: int32 = (cse_var_18 + 5)
+              let cse_var_12: int32 = (cse_var_18 + 4)
+              let cse_var_11: int32 = (cse_var_18 + 3)
+              let cse_var_10: int32 = (cse_var_18 + 2)
+              let cse_var_9: int32 = (cse_var_18 + 15)
+              let cse_var_8: int32 = (cse_var_18 + 14)
+              let cse_var_7: int32 = (cse_var_18 + 13)
+              let cse_var_6: int32 = (cse_var_18 + 12)
+              let cse_var_5: int32 = (cse_var_18 + 11)
+              let cse_var_4: int32 = (cse_var_18 + 10)
+              let cse_var_3: int32 = (cse_var_18 + 1)
+               {
+                compute_5[cse_var_18] = (compute_5[cse_var_18] + (placeholder_1[((placeholder_3[cse_var_20]*16) + cse_var_21)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+                compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_19 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
+              }
             }
           }
         }
       }
-      for (i0.inner: int32, 0, 8) {
-        let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+      for (i0.inner: int32, 0, 128) {
+        let cse_var_22: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*32))
         compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
       }
     }
@@ -727,7 +729,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.927 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.753 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index 78ba70c79..0fb327f76 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:43.953</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:43.715</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -331,22 +331,22 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:43.919</p></td>
+<td><p>00:43.679</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
-<td><p>00:00.019</p></td>
+<td><p>00:00.021</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
 <td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></td>
 <td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
 <td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index a129d11e9..60947f43e 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -1167,8 +1167,8 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2885496
-No: 6   GFLOPS: 110.85/110.85   result: MeasureResult(costs=(0.0020884737708333333,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8389439582824707, timestamp=1658167012.8734102)      [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
-No: 7   GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 6   GFLOPS: 42.37/42.37     result: MeasureResult(costs=(0.005463718842105263,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6390187740325928, timestamp=1658180905.033303)        [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
+No: 7   GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1291,7 +1291,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 16, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6225319
-No: 8   GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 8   GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1414,7 +1414,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,943546
-No: 9   GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 9   GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1537,7 +1537,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 16, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2868708
-No: 10  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 10  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
     res = future.result()
   File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
@@ -1555,7 +1555,7 @@ No: 10  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
 TimeoutError
 
         [(&#39;tile_f&#39;, [-1, 32, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4691833
-No: 11  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 11  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1678,7 +1678,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,1042124
-No: 12  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 12  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1801,7 +1801,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10013405
-No: 13  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 13  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1924,7 +1924,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6732082
-No: 14  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 14  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2047,7 +2047,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 32]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7536735
-No: 15  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 15  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2170,7 +2170,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 128, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,482121
-No: 16  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 16  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2293,7 +2293,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2824525
-No: 17  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 17  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2416,7 +2416,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4559286
-No: 18  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 18  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2539,7 +2539,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 32, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9677544
-No: 19  GFLOPS: 0.00/110.85     result: Traceback (most recent call last):
+No: 19  GFLOPS: 0.00/42.37      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 738, in __call__
     yield remote, remote.load_module(os.path.split(build_result.filename)[1])
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 702, in run_through_rpc
@@ -2627,7 +2627,7 @@ tvm._ffi.base.TVMError: Traceback (most recent call last):
   15: _PyEval_EvalFrameDefault
   14: 0x0000000000537c30
   13: _PyObject_FastCallKeywords
-  12: 0x00007f0c1aea4fa2
+  12: 0x00007f6238feefa2
   11: _ctypes_callproc
   10: ffi_call
   9: ffi_call_unix64
@@ -2692,7 +2692,7 @@ Traceback (most recent call last):
   21: _PyFunction_FastCallKeywords
   20: _PyEval_EvalFrameDefault
   19: _PyFunction_FastCall      [(&#39;tile_f&#39;, [-1, 8, 2, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6390073
-No: 20  GFLOPS: 144.67/144.67   result: MeasureResult(costs=(0.00160021674,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.448404312133789, timestamp=1658167038.7930841)       [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
+No: 20  GFLOPS: 144.06/144.06   result: MeasureResult(costs=(0.00160695119,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.440112590789795, timestamp=1658180931.5978546)       [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2733,7 +2733,7 @@ and measure running time.</p>
 Best config:
 [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 Finish loading 20 records
-Time cost of this operator: 0.002015
+Time cost of this operator: 0.002017
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index 5e7e6a9cf..9917cd784 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -578,10 +578,10 @@ the tuned operator.</p>
 ########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  311.2     98.724   (1, 2, 10, 10, 3)  2       1        [311.2]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.046     0.966    (1, 6, 10, 10)     1       1        [3.046]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.977     0.31     (1, 1, 10, 10, 3)  1       1        [0.977]
-Total_time                                    -                                             315.222   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.7     98.726   (1, 2, 10, 10, 3)  2       1        [309.7]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.017     0.962    (1, 6, 10, 10)     1       1        [3.017]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.98      0.312    (1, 1, 10, 10, 3)  1       1        [0.98]
+Total_time                                    -                                             313.697   -        -                  -       -        -
 </pre></div>
 </div>
 </div>
@@ -634,10 +634,10 @@ Total_time                                    -
 ########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  247.6     98.819   (1, 1, 10, 10, 6)  2       1        [247.6]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.987     0.793    (1, 6, 10, 10)     1       1        [1.987]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.972     0.388    (1, 1, 10, 10, 3)  1       1        [0.972]
-Total_time                                    -                                             250.559   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  121.8     97.83    (1, 6, 10, 10, 1)  2       1        [121.8]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.747     1.403    (1, 6, 10, 10)     1       1        [1.747]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.955     0.767    (1, 1, 10, 10, 3)  1       1        [0.955]
+Total_time                                    -                                             124.502   -        -                  -       -        -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index 18e6ac7d8..6ac324e3d 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -510,7 +510,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpvfj7zyx7/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpzq69y51r/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -570,8 +570,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpvfj7zyx7/images/target contains 8144 images
-/tmp/tmpvfj7zyx7/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpzq69y51r/images/target contains 8144 images
+/tmp/tmpzq69y51r/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -683,13 +683,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 55s - loss: 0.2419 - accuracy: 0.9193 - val_loss: 0.1380 - val_accuracy: 0.9532
+328/328 - 55s - loss: 0.2177 - accuracy: 0.9237 - val_loss: 0.1470 - val_accuracy: 0.9520
 Epoch 2/3
-328/328 - 53s - loss: 0.0997 - accuracy: 0.9630 - val_loss: 0.1219 - val_accuracy: 0.9626
+328/328 - 53s - loss: 0.0957 - accuracy: 0.9647 - val_loss: 0.1194 - val_accuracy: 0.9630
 Epoch 3/3
-328/328 - 53s - loss: 0.0688 - accuracy: 0.9738 - val_loss: 0.1334 - val_accuracy: 0.9569
+328/328 - 52s - loss: 0.0658 - accuracy: 0.9757 - val_loss: 0.1752 - val_accuracy: 0.9426
 
-&lt;keras.callbacks.History object at 0x7f21783f5090&gt;
+&lt;keras.callbacks.History object at 0x7f3384193890&gt;
 </pre></div>
 </div>
 </div>
@@ -951,7 +951,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  25.546 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  48.383 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index 9e4e3c573..997ba4dea 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:13.418</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>05:34.881</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,15 +331,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>04:25.546</p></td>
+<td><p>04:48.383</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:44.502</p></td>
+<td><p>00:43.192</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:03.369</p></td>
+<td><p>00:03.305</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index 93748f1f6..730eb856e 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:11.545</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:12.436</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,11 +331,11 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:10.023</p></td>
+<td><p>00:10.597</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.515</p></td>
+<td><p>00:01.833</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index fda62608a..d88db1df9 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -517,7 +517,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f20f80f7e60&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f32f4985830&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index 5e0df972b..5a6d9136d 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:04.162</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:04.303</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,31 +331,31 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:01.919</p></td>
+<td><p>00:01.941</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:00.991</p></td>
+<td><p>00:01.106</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.538</p></td>
+<td><p>00:00.541</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.525</p></td>
+<td><p>00:00.531</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.103</p></td>
+<td><p>00:00.101</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
-<td><p>00:00.043</p></td>
+<td><p>00:00.040</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
-<td><p>00:00.028</p></td>
+<td><p>00:00.029</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index 7f970e549..afc172819 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -572,7 +572,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C}
   preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmp9ffzmr_l/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmp9ffzmr_l/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpjlp7kihb/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpjlp7kihb/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/reference/api/doxygen/classes.html b/docs/reference/api/doxygen/classes.html
index 00005f97f..fe4df0e9b 100644
--- a/docs/reference/api/doxygen/classes.html
+++ b/docs/reference/api/doxygen/classes.html
@@ -69,11 +69,11 @@ $(function() {
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html">Conv3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1Iterator.html">Iterator</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Postproc.html">Postproc</a> (<a class=" [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzer.html">AccessAnalyzer</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html">Conv3DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1Span_1_1iterator [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzerNode.html">AccessAnalyzerNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1It [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool1DAttrs.html">AdaptivePool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html">ConvGemmWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1auto__scheduler_1_1AttachMap [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool2DAttrs.html">AdaptivePool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExpr. [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool1DAttrs.html">AdaptivePool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html">ConvGemmWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1auto__scheduler_1_1AttachMap [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool2DAttrs.html">AdaptivePool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExpr. [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool3DAttrs.html">AdaptivePool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CorrelationAttrs.html">CorrelationAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExprNode.html">IterMapExprNode</a> (< [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Add.html">Add</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResult.html">IterMapResult</a> (<a class="el" href="namespacetvm_ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AddNode.html">AddNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResultNode.html">IterMapResultNode</a> (<a class="el"  [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Add.html">Add</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResult.html">IterMapResult</a> (<a class="el" href="namespacetvm_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AddNode.html">AddNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResultNode.html">IterMapResultNode</a> (<a class="el"  [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADT.html">ADT</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModelNode.html">CostModelNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMark.html">IterMark</a> (<a class="el" href="nam [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADTObj.html">ADTObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModelNode.html">CostModelNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMarkNode.html">IterMarkNode</a> (<a cla [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AffineGridAttrs.html">AffineGridAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1CountNode.html">CountNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSplitExpr.html">Iter [...]
@@ -136,15 +136,15 @@ $(function() {
 </td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegion.html">MatchBufferRegion</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1RebaseNode.html">RebaseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1te_1_1TensorDom.html">TensorDom</a> (<a class="el" href="namespacetvm_1_1te.html">tv [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNonDefaultVisitor.html">AttrNonDefaultVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegionNode.html">MatchBufferRegionNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1RecClosure.html">RecClosure</a> (<a [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrNopEntry.html">AttrNopEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1EinsumAttrs.html">EinsumAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MatchNode.html">MatchNode</a> (<a class="el" href="namespacetvm_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNormalVisitor.html">AttrNormalVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatmulAttrs.html">MatmulAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::r [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPattern.html">AttrPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html">MatrixSetDiagAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNormalVisitor.html">AttrNormalVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatmulAttrs.html">MatmulAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::r [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPattern.html">AttrPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html">MatrixSetDiagAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html" [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPatternNode.html">AttrPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQ.html">EQ</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Max.html">Max</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160; [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistry.html">AttrRegistry</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQNode.html">EQNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MaxNode.html">MaxNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td  [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistryMap.html">AttrRegistryMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1ErrorBuilder.html">ErrorBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html">MaxPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160; [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistryMapContainerMap.html">AttrRegistryMapContainerMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ErrorReporter.html">ErrorReporter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.ht [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Attrs.html">Attrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Evaluate.html">Evaluate</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html">MaxPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#16 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrsNode.html">AttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html">EvaluateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallback.html">MeasureCallback</a> (<a class="el" href="namespacetvm_1_1meta__schedule.ht [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Executable.html">Executable</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallback.html">Mea [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Attrs.html">Attrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Evaluate.html">Evaluate</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html">MaxPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrsNode.html">AttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html">EvaluateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallback.html">MeasureCallback</a> (<a class="el" href="namespacetvm_1_1auto__scheduler. [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Executable.html">Executable</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallback.html">Meas [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSHashVisitor.html">AttrsSHashVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Executor.html">Executor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html">MeasureCallbackNode</a> (<a  [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmt.html">AttrStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html">ExecutorNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallbackNode.html">MeasureCallbackNode</a> (<a class="el" href="n [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExecutorRegEntry.html">ExecutorRegEntry</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidate.html">MeasureCandidate</a> (<a class="el [...]
@@ -212,8 +212,8 @@ $(function() {
 <tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Builder.html">Builder</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherAttrs.html">GatherAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_o"></a><table border="0" cellspacing="0" cellpadding="0"><tr><t [...]
 </td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Schedule.html">Schedule</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeDataNode.html">TypeDataNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
 <tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInput.html">BuilderInput</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherNDAttrs.html">GatherNDAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Schedule.html">Schedule</a> (<a class="el [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInputNode.html">BuilderInputNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GE.html">GE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjAllocatorBase.html">ObjAllocatorBase</a> (<a class="el" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderNode.html">BuilderNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFunc.html">GenericFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Object.html">Object</a> (<a class="el" href="namespacetvm_1_1runtime. [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInputNode.html">BuilderInputNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GE.html">GE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjAllocatorBase.html">ObjAllocatorBase</a> (<a class="el" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderNode.html">BuilderNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFunc.html">GenericFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Object.html">Object</a> (<a class="el" href="namespacetvm_1_1runtime. [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResult.html">BuilderResult</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFuncNode.html">GenericFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectEqual.html">ObjectEqual</a> (<a class="el" href="n [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResultNode.html">BuilderResultNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GENode.html">GENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectHash.html">ObjectHash</a> (<a class="el"  [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResult.html">BuildResult</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html">GetValidCountsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a> (<a [...]
@@ -231,9 +231,9 @@ $(function() {
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CallLoweredAttrs.html">CallLoweredAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_h"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;h&#160;&#160;</div></td></tr></table>
 </td><td valign="top"><a class="el" href="classtvm_1_1Op.html">Op</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTask.html">SearchTask</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeNode.html">TypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</ [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpAttrMap.html">OpAttrMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTaskNode.html">SearchTaskNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Operation.html">Operation</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPattern.html">CallPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1OperationNode.html">OperationNode</a> (<a class="el" href="namespacetvm_1_1t [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPatternNode.html">CallPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html">OpImplementation</a> (<a class="el" href="na [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Operation.html">Operation</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>) [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPattern.html">CallPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1OperationNode.html">OperationNode</a> (<a class="el" href="namespacetvm_1_1te. [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPatternNode.html">CallPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html">O [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1CanonicalSimplifier.html">CanonicalSimplifier</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html">Handler&lt; DLDataType &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImp [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Cast.html">Cast</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html">Handler&lt; DLDevice &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpNode.html">OpNode</a> (<a class="el" href="namespace [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastAttrs.html">CastAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParams.html">HardwareParams</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpRegEntry.html">OpRegEntry</a> (<a class="el" hr [...]
@@ -257,8 +257,8 @@ $(function() {
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html">CompilerAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html">ImplSEqualReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_0 [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStep.html">ComputeAtStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtv [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html">ComputeAtStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueC [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html">ComputeDAG</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1run [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html">ComputeDAGNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html">ComputeDAG</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1run [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html">ComputeDAGNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html">ComputeInlineStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs_3_01T_00_01true_01_4.html">ImplVisitAttrs&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="clas [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html">ComputeInlineStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteType.html">IncompleteType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html">PadAttrs</a> (<a class="el [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOp.html">ComputeOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteTypeNode.html">IncompleteTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1Pass.html">Pass</a> (<a class="el" href="namespacetvm_1_1transform.html">tvm::transform</a>) [...]
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode-members.html b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode-members.html
index bb0075db0..71aa19c32 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode-members.html
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode-members.html
@@ -101,16 +101,17 @@ $(function() {
   <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a0d492efee331e2239a093f4b2017c10f">ref_counter_</a></td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a55549a6c23987890246248682560a03d">RefCounterType</a> typedef</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#ad94d79729ac85aa7c976e23d39066383">RuntimeTypeIndex</a>()</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a7924ccb2fdea6074cca1978c062fb034">TargetInternal</a> class</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a737fff60e8ac6c7549b2f44097fffb48">TargetKindRegEntry</a> class</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a5d4bc475bf6b71b1eca72a24cf036b6f">TVM_DECLARE_FINAL_OBJECT_INFO</a>(TargetKindNode, Object)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a481f01923b14e1851ebd38506e9c66ea">type_index</a>() const</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a4bfc2586cb55f2af47728187b3256255">type_index_</a></td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a817ba6c23b7ee1821c48a75edf255a30">TypeIndex2Key</a>(uint32_t tindex)</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a6ee32a02dd44257da105fbbe5d9c8622">TypeIndex2KeyHash</a>(uint32_t tindex)</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a6841f97e06e6614dd7e82c6dd41b818a">TypeKey2Index</a>(const std::string &amp;key)</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#afd548730a6139d19fe24473ad66026d7">unique</a>() const</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a87b1530870f586aa78996f7449e445a6">VisitAttrs</a>(AttrVisitor *v)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f">target_parser</a></td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a7924ccb2fdea6074cca1978c062fb034">TargetInternal</a> class</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a737fff60e8ac6c7549b2f44097fffb48">TargetKindRegEntry</a> class</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a5d4bc475bf6b71b1eca72a24cf036b6f">TVM_DECLARE_FINAL_OBJECT_INFO</a>(TargetKindNode, Object)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a481f01923b14e1851ebd38506e9c66ea">type_index</a>() const</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a4bfc2586cb55f2af47728187b3256255">type_index_</a></td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a817ba6c23b7ee1821c48a75edf255a30">TypeIndex2Key</a>(uint32_t tindex)</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a6ee32a02dd44257da105fbbe5d9c8622">TypeIndex2KeyHash</a>(uint32_t tindex)</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#a6841f97e06e6614dd7e82c6dd41b818a">TypeKey2Index</a>(const std::string &amp;key)</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">static</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html#afd548730a6139d19fe24473ad66026d7">unique</a>() const</td><td class="entry"><a class="el" href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html#a87b1530870f586aa78996f7449e445a6">VisitAttrs</a>(AttrVisitor *v)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
 </table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode.html b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode.html
index 916d666d8..691f9eb4c 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode.html
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode.html
@@ -81,13 +81,13 @@ $(function() {
 <div class="dynheader">
 Inheritance diagram for tvm::TargetKindNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1TargetKindNode__inherit__graph.svg" width="290" height="742"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1TargetKindNode__inherit__graph.svg" width="290" height="756"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
 Collaboration diagram for tvm::TargetKindNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1TargetKindNode__coll__graph.svg" width="966" height="1346"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1TargetKindNode__coll__graph.svg" width="1220" height="1346"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
@@ -134,6 +134,9 @@ Public Attributes</h2></td></tr>
 <tr class="memitem:a47f02c66d0f972befdfb29ec592ecba0"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFunc.html">PackedFunc</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1TargetKindNode.html#a47f02c66d0f972befdfb29ec592ecba0">preprocessor</a></td></tr>
 <tr class="memdesc:a47f02c66d0f972befdfb29ec592ecba0"><td class="mdescLeft">&#160;</td><td class="mdescRight">Function used to preprocess on target creation.  <a href="#a47f02c66d0f972befdfb29ec592ecba0">More...</a><br /></td></tr>
 <tr class="separator:a47f02c66d0f972befdfb29ec592ecba0"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a713525ca63d41aacadec9db01d28f59f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">FTVMTargetParser</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f">target_parser</a></td></tr>
+<tr class="memdesc:a713525ca63d41aacadec9db01d28f59f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Function used to parse a JSON target during creation.  <a href="#a713525ca63d41aacadec9db01d28f59f">More...</a><br /></td></tr>
+<tr class="separator:a713525ca63d41aacadec9db01d28f59f"><td class="memSeparator" colspan="2">&#160;</td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-attribs"></a>
 Static Public Attributes</h2></td></tr>
@@ -476,6 +479,22 @@ template&lt;typename , typename , typename &gt; </div>
 
 <p>Function used to preprocess on target creation. </p>
 
+</div>
+</div>
+<a id="a713525ca63d41aacadec9db01d28f59f"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a713525ca63d41aacadec9db01d28f59f">&#9670;&nbsp;</a></span>target_parser</h2>
+
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">FTVMTargetParser</a> tvm::TargetKindNode::target_parser</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>Function used to parse a JSON target during creation. </p>
+
 </div>
 </div>
 <hr/>The documentation for this class was generated from the following file:<ul>
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__coll__graph.svg
index 08715d17f..32c9e53b4 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__coll__graph.svg
@@ -4,22 +4,22 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: tvm::TargetKindNode Pages: 1 -->
-<svg width="724pt" height="1009pt"
- viewBox="0.00 0.00 723.50 1009.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="915pt" height="1009pt"
+ viewBox="0.00 0.00 915.00 1009.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1005)">
 <title>tvm::TargetKindNode</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1005 719.5,-1005 719.5,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1005 911,-1005 911,4 -4,4"/>
 <!-- Node2 -->
 <g id="node1" class="node">
 <title>Node2</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="325,-.5 325,-79.5 534,-79.5 534,-.5 325,-.5"/>
-<text text-anchor="middle" x="429.5" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindNode</text>
-<polyline fill="none" stroke="#000000" points="325,-60.5 534,-60.5 "/>
-<text text-anchor="start" x="333" y="-48.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ device_type</text>
-<text text-anchor="start" x="333" y="-37.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_key</text>
-<polyline fill="none" stroke="#000000" points="325,-30.5 534,-30.5 "/>
-<text text-anchor="start" x="333" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ VisitAttrs()</text>
-<text text-anchor="start" x="333" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="408,-.5 408,-79.5 617,-79.5 617,-.5 408,-.5"/>
+<text text-anchor="middle" x="512.5" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindNode</text>
+<polyline fill="none" stroke="#000000" points="408,-60.5 617,-60.5 "/>
+<text text-anchor="start" x="416" y="-48.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ device_type</text>
+<text text-anchor="start" x="416" y="-37.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_key</text>
+<polyline fill="none" stroke="#000000" points="408,-30.5 617,-30.5 "/>
+<text text-anchor="start" x="416" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ VisitAttrs()</text>
+<text text-anchor="start" x="416" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TVM_DECLARE_FINAL_OBJECT_INFO()</text>
 </g>
 <!-- Node3 -->
 <g id="node2" class="node">
@@ -67,8 +67,8 @@
 <!-- Node3&#45;&gt;Node2 -->
 <g id="edge1" class="edge">
 <title>Node3&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M190.0409,-195.3388C212.3548,-171.0251 237.0318,-146.9421 262.5,-127 285.5369,-108.9616 312.8779,-92.9023 338.6976,-79.643"/>
-<polygon fill="none" stroke="#191970" points="187.3198,-193.1292 183.1935,-202.8875 192.5045,-197.8323 187.3198,-193.1292"/>
+<path fill="none" stroke="#191970" d="M190.1005,-190.0458C212.0145,-166.7858 236.5231,-144.4536 262.5,-127 306.1764,-97.6542 360.7945,-77.1551 407.8294,-63.4596"/>
+<polygon fill="none" stroke="#191970" points="187.307,-187.911 183.0782,-197.6253 192.4419,-192.6685 187.307,-187.911"/>
 </g>
 <!-- Node3&#45;&gt;Node3 -->
 <g id="edge2" class="edge">
@@ -100,9 +100,9 @@
 <!-- Node4&#45;&gt;Node2 -->
 <g id="edge3" class="edge">
 <title>Node4&#45;&gt;Node2</title>
-<path fill="none" stroke="#404040" d="M358.684,-253.7771C366.6762,-208.7969 379.639,-148.7681 398.5,-98 399.4207,-95.5216 400.4286,-93.0171 401.5001,-90.5114"/>
-<polygon fill="none" stroke="#404040" points="401.509,-90.4921 400.3896,-83.3684 406.5347,-79.5952 407.6542,-86.7189 401.509,-90.4921"/>
-<text text-anchor="middle" x="436" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +preprocessor</text>
+<path fill="none" stroke="#404040" d="M363.3257,-253.8112C376.152,-206.7374 398.2948,-144.2303 433.5,-98 436.0513,-94.6497 438.8477,-91.4051 441.818,-88.277"/>
+<polygon fill="none" stroke="#404040" points="441.971,-88.1288 443.4973,-81.081 450.5899,-79.7792 449.0637,-86.827 441.971,-88.1288"/>
+<text text-anchor="middle" x="471" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +preprocessor</text>
 </g>
 <!-- Node5 -->
 <g id="node4" class="node">
@@ -135,13 +135,13 @@
 <!-- Node5&#45;&gt;Node4 -->
 <g id="edge4" class="edge">
 <title>Node5&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M449.4048,-543.2245C444.6354,-533.7067 439.9509,-524.2149 435.5,-515 415.3519,-473.2861 394.0497,-425.8474 377.5672,-388.322"/>
-<polygon fill="none" stroke="#191970" points="446.3758,-544.9915 453.9988,-552.3496 452.6282,-541.8437 446.3758,-544.9915"/>
+<path fill="none" stroke="#191970" d="M448.4942,-543.2764C443.6835,-533.7468 438.9678,-524.2382 434.5,-515 414.3699,-473.376 393.3083,-425.939 377.0665,-388.392"/>
+<polygon fill="none" stroke="#191970" points="445.4836,-545.0782 453.1306,-552.4113 451.7256,-541.91 445.4836,-545.0782"/>
 </g>
-<!-- Node7 -->
-<g id="node6" class="node">
-<title>Node7</title>
-<g id="a_node6"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="{tvm::runtime::Array\l\&lt; tvm::runtime::String \&gt;\n||+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ operator=()\l+ operator=()\land 24 more...\l}">
+<!-- Node8 -->
+<g id="node7" class="node">
+<title>Node8</title>
+<g id="a_node7"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="{tvm::runtime::Array\l\&lt; tvm::runtime::String \&gt;\n||+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ Array()\l+ operator=()\l+ operator=()\land 24 more...\l}">
 <polygon fill="#ffffff" stroke="#000000" points="444,-232 444,-410 581,-410 581,-232 444,-232"/>
 <text text-anchor="start" x="452" y="-398" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
 <text text-anchor="middle" x="512.5" y="-387" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::runtime::String &gt;</text>
@@ -162,16 +162,16 @@
 </a>
 </g>
 </g>
-<!-- Node5&#45;&gt;Node7 -->
-<g id="edge7" class="edge">
-<title>Node5&#45;&gt;Node7</title>
+<!-- Node5&#45;&gt;Node8 -->
+<g id="edge8" class="edge">
+<title>Node5&#45;&gt;Node8</title>
 <path fill="none" stroke="#191970" d="M512.5,-542.2298C512.5,-498.7434 512.5,-450.5445 512.5,-410.2656"/>
 <polygon fill="none" stroke="#191970" points="509.0001,-542.3 512.5,-552.3001 516.0001,-542.3001 509.0001,-542.3"/>
 </g>
-<!-- Node8 -->
-<g id="node7" class="node">
-<title>Node8</title>
-<g id="a_node7"><a xlink:href="classtvm_1_1runtime_1_1String.html" target="_top" xlink:title="Reference to string objects. ">
+<!-- Node9 -->
+<g id="node8" class="node">
+<title>Node9</title>
+<g id="a_node8"><a xlink:href="classtvm_1_1runtime_1_1String.html" target="_top" xlink:title="Reference to string objects. ">
 <polygon fill="#ffffff" stroke="#000000" points="599.5,-226.5 599.5,-415.5 715.5,-415.5 715.5,-226.5 599.5,-226.5"/>
 <text text-anchor="middle" x="657.5" y="-403.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::String</text>
 <polyline fill="none" stroke="#000000" points="599.5,-396.5 715.5,-396.5 "/>
@@ -193,9 +193,9 @@
 </a>
 </g>
 </g>
-<!-- Node5&#45;&gt;Node8 -->
-<g id="edge9" class="edge">
-<title>Node5&#45;&gt;Node8</title>
+<!-- Node5&#45;&gt;Node9 -->
+<g id="edge10" class="edge">
+<title>Node5&#45;&gt;Node9</title>
 <path fill="none" stroke="#191970" d="M577.5363,-543.3697C582.0935,-533.8516 586.4713,-524.3179 590.5,-515 604.2168,-483.2747 617.1581,-447.7387 627.9421,-415.8345"/>
 <polygon fill="none" stroke="#191970" points="574.3312,-541.9578 573.1177,-552.4829 580.6299,-545.0118 574.3312,-541.9578"/>
 </g>
@@ -230,19 +230,40 @@
 <polygon fill="none" stroke="#404040" points="512.5001,-786.7944 508.5,-780.7944 512.5,-774.7944 516.5,-780.7943 512.5001,-786.7944"/>
 <text text-anchor="middle" x="532" y="-796" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
 </g>
+<!-- Node7 -->
+<g id="node6" class="node">
+<title>Node7</title>
+<g id="a_node6"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="{tvm::runtime::TypedPacked\lFunc\&lt; TargetJSON(TargetJSON)\&gt;\n||}">
+<polygon fill="#ffffff" stroke="#000000" points="734,-287 734,-355 907,-355 907,-287 734,-287"/>
+<text text-anchor="start" x="742" y="-343" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="middle" x="820.5" y="-332" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; TargetJSON(TargetJSON)&gt;</text>
+<polyline fill="none" stroke="#000000" points="734,-325 907,-325 "/>
+<text text-anchor="middle" x="820.5" y="-313" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="734,-306 907,-306 "/>
+<text text-anchor="middle" x="820.5" y="-294" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+</a>
+</g>
+</g>
 <!-- Node7&#45;&gt;Node2 -->
 <g id="edge6" class="edge">
 <title>Node7&#45;&gt;Node2</title>
-<path fill="none" stroke="#404040" d="M506.6778,-231.9049C501.5168,-189.927 491.7962,-139.9735 473.5,-98 472.3495,-95.3605 471.0552,-92.7315 469.652,-90.132"/>
-<polygon fill="none" stroke="#404040" points="469.5401,-89.9457 463.0209,-86.8635 463.3586,-79.6603 469.8778,-82.7425 469.5401,-89.9457"/>
-<text text-anchor="middle" x="513" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +default_keys</text>
+<path fill="none" stroke="#404040" d="M811.6559,-286.654C799.0191,-243.9199 771.9149,-171.2027 724.5,-127 697.571,-101.8954 662.4089,-83.5723 628.4848,-70.4083"/>
+<polygon fill="none" stroke="#404040" points="628.2844,-70.3336 621.2647,-71.9843 617.0412,-66.1395 624.0608,-64.4888 628.2844,-70.3336"/>
+<text text-anchor="middle" x="738" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +target_parser</text>
 </g>
 <!-- Node8&#45;&gt;Node2 -->
-<g id="edge8" class="edge">
+<g id="edge7" class="edge">
 <title>Node8&#45;&gt;Node2</title>
-<path fill="none" stroke="#404040" d="M636.3161,-226.1461C625.9877,-192.8524 611.1937,-156.4444 590.5,-127 578.5883,-110.0513 562.3001,-96.0923 544.6374,-84.6997"/>
-<polygon fill="none" stroke="#404040" points="544.3515,-84.5257 537.1465,-84.8221 534.1017,-78.2853 541.3068,-77.9889 544.3515,-84.5257"/>
-<text text-anchor="middle" x="594.5" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +name</text>
+<path fill="none" stroke="#404040" d="M512.5,-231.7037C512.5,-185.5903 512.5,-130.9516 512.5,-91.8275"/>
+<polygon fill="none" stroke="#404040" points="512.5001,-91.7004 508.5,-85.7005 512.5,-79.7004 516.5,-85.7004 512.5001,-91.7004"/>
+<text text-anchor="middle" x="550" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +default_keys</text>
+</g>
+<!-- Node9&#45;&gt;Node2 -->
+<g id="edge9" class="edge">
+<title>Node9&#45;&gt;Node2</title>
+<path fill="none" stroke="#404040" d="M643.6003,-226.3155C634.0197,-184.2098 618.0248,-135.7399 591.5,-98 589.2008,-94.7287 586.658,-91.5692 583.9358,-88.5286"/>
+<polygon fill="none" stroke="#404040" points="583.7227,-88.3126 576.6612,-86.8509 575.2946,-79.7705 582.356,-81.2322 583.7227,-88.3126"/>
+<text text-anchor="middle" x="617.5" y="-101" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> +name</text>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__inherit__graph.svg
index cbf384217..66467849a 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindNode__inherit__graph.svg
@@ -4,21 +4,22 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: tvm::TargetKindNode Pages: 1 -->
-<svg width="217pt" height="556pt"
- viewBox="0.00 0.00 217.00 556.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 552)">
+<svg width="217pt" height="567pt"
+ viewBox="0.00 0.00 217.00 567.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 563)">
 <title>tvm::TargetKindNode</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-552 213,-552 213,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-563 213,-563 213,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="0,-.5 0,-112.5 209,-112.5 209,-.5 0,-.5"/>
-<text text-anchor="middle" x="104.5" y="-100.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindNode</text>
-<polyline fill="none" stroke="#000000" points="0,-93.5 209,-93.5 "/>
-<text text-anchor="start" x="8" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ name</text>
-<text text-anchor="start" x="8" y="-70.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ device_type</text>
-<text text-anchor="start" x="8" y="-59.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ default_keys</text>
-<text text-anchor="start" x="8" y="-48.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ preprocessor</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="0,-.5 0,-123.5 209,-123.5 209,-.5 0,-.5"/>
+<text text-anchor="middle" x="104.5" y="-111.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindNode</text>
+<polyline fill="none" stroke="#000000" points="0,-104.5 209,-104.5 "/>
+<text text-anchor="start" x="8" y="-92.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ name</text>
+<text text-anchor="start" x="8" y="-81.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ device_type</text>
+<text text-anchor="start" x="8" y="-70.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ default_keys</text>
+<text text-anchor="start" x="8" y="-59.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ preprocessor</text>
+<text text-anchor="start" x="8" y="-48.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ target_parser</text>
 <text text-anchor="start" x="8" y="-37.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_key</text>
 <polyline fill="none" stroke="#000000" points="0,-30.5 209,-30.5 "/>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ VisitAttrs()</text>
@@ -28,51 +29,51 @@
 <g id="node2" class="node">
 <title>Node1</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1runtime_1_1Object.html" target="_top" xlink:title="base class of all object containers. ">
-<polygon fill="#ffffff" stroke="#000000" points="13,-149.5 13,-547.5 196,-547.5 196,-149.5 13,-149.5"/>
-<text text-anchor="middle" x="104.5" y="-535.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Object</text>
-<polyline fill="none" stroke="#000000" points="13,-528.5 196,-528.5 "/>
-<text text-anchor="start" x="21" y="-516.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_key</text>
-<text text-anchor="start" x="21" y="-505.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_final</text>
-<text text-anchor="start" x="21" y="-494.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_child_slots</text>
-<text text-anchor="start" x="21" y="-483.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_child_slots_can</text>
-<text text-anchor="start" x="21" y="-472.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_overflow</text>
-<text text-anchor="start" x="21" y="-461.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_has_method_visit</text>
-<text text-anchor="start" x="21" y="-450.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_attrs</text>
-<text text-anchor="start" x="21" y="-439.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_has_method_sequal</text>
-<text text-anchor="start" x="21" y="-428.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_reduce</text>
-<text text-anchor="start" x="21" y="-417.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_has_method_shash</text>
-<text text-anchor="start" x="21" y="-406.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_reduce</text>
-<text text-anchor="start" x="21" y="-395.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_index</text>
-<text text-anchor="start" x="21" y="-384.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># type_index_</text>
-<text text-anchor="start" x="21" y="-373.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ref_counter_</text>
-<text text-anchor="start" x="21" y="-362.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># deleter_</text>
-<polyline fill="none" stroke="#000000" points="13,-355.5 196,-355.5 "/>
-<text text-anchor="start" x="21" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ type_index()</text>
-<text text-anchor="start" x="21" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ GetTypeKey()</text>
-<text text-anchor="start" x="21" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ GetTypeKeyHash()</text>
-<text text-anchor="start" x="21" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ IsInstance()</text>
-<text text-anchor="start" x="21" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ unique()</text>
+<polygon fill="#ffffff" stroke="#000000" points="13,-160.5 13,-558.5 196,-558.5 196,-160.5 13,-160.5"/>
+<text text-anchor="middle" x="104.5" y="-546.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Object</text>
+<polyline fill="none" stroke="#000000" points="13,-539.5 196,-539.5 "/>
+<text text-anchor="start" x="21" y="-527.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_key</text>
+<text text-anchor="start" x="21" y="-516.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_final</text>
+<text text-anchor="start" x="21" y="-505.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_child_slots</text>
+<text text-anchor="start" x="21" y="-494.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_child_slots_can</text>
+<text text-anchor="start" x="21" y="-483.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_overflow</text>
+<text text-anchor="start" x="21" y="-472.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_has_method_visit</text>
+<text text-anchor="start" x="21" y="-461.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_attrs</text>
+<text text-anchor="start" x="21" y="-450.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_has_method_sequal</text>
+<text text-anchor="start" x="21" y="-439.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_reduce</text>
+<text text-anchor="start" x="21" y="-428.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_has_method_shash</text>
+<text text-anchor="start" x="21" y="-417.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_reduce</text>
+<text text-anchor="start" x="21" y="-406.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_index</text>
+<text text-anchor="start" x="21" y="-395.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># type_index_</text>
+<text text-anchor="start" x="21" y="-384.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ref_counter_</text>
+<text text-anchor="start" x="21" y="-373.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># deleter_</text>
+<polyline fill="none" stroke="#000000" points="13,-366.5 196,-366.5 "/>
+<text text-anchor="start" x="21" y="-354.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ type_index()</text>
+<text text-anchor="start" x="21" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ GetTypeKey()</text>
+<text text-anchor="start" x="21" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ GetTypeKeyHash()</text>
+<text text-anchor="start" x="21" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ IsInstance()</text>
+<text text-anchor="start" x="21" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ unique()</text>
+<text text-anchor="start" x="21" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Object()</text>
 <text text-anchor="start" x="21" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Object()</text>
 <text text-anchor="start" x="21" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Object()</text>
-<text text-anchor="start" x="21" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Object()</text>
+<text text-anchor="start" x="21" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
 <text text-anchor="start" x="21" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="21" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="21" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TypeIndex2Key()</text>
-<text text-anchor="start" x="21" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TypeIndex2KeyHash()</text>
-<text text-anchor="start" x="21" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TypeKey2Index()</text>
-<text text-anchor="start" x="21" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _GetOrAllocRuntimeTypeIndex()</text>
-<text text-anchor="start" x="21" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ RuntimeTypeIndex()</text>
-<text text-anchor="start" x="21" y="-178.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># IncRef()</text>
-<text text-anchor="start" x="21" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DecRef()</text>
-<text text-anchor="start" x="21" y="-156.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetOrAllocRuntimeTypeIndex()</text>
+<text text-anchor="start" x="21" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TypeIndex2Key()</text>
+<text text-anchor="start" x="21" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TypeIndex2KeyHash()</text>
+<text text-anchor="start" x="21" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ TypeKey2Index()</text>
+<text text-anchor="start" x="21" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _GetOrAllocRuntimeTypeIndex()</text>
+<text text-anchor="start" x="21" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ RuntimeTypeIndex()</text>
+<text text-anchor="start" x="21" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># IncRef()</text>
+<text text-anchor="start" x="21" y="-178.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DecRef()</text>
+<text text-anchor="start" x="21" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetOrAllocRuntimeTypeIndex()</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
 <title>Node1&#45;&gt;Node0</title>
-<path fill="none" stroke="#191970" d="M104.5,-139.0179C104.5,-129.6961 104.5,-120.8031 104.5,-112.54"/>
-<polygon fill="none" stroke="#191970" points="101.0001,-139.2918 104.5,-149.2918 108.0001,-139.2919 101.0001,-139.2918"/>
+<path fill="none" stroke="#191970" d="M104.5,-150.0774C104.5,-140.8427 104.5,-131.9846 104.5,-123.6845"/>
+<polygon fill="none" stroke="#191970" points="101.0001,-150.2371 104.5,-160.2371 108.0001,-150.2372 101.0001,-150.2371"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry-members.html b/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry-members.html
index 264193576..aa0db2495 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry-members.html
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry-members.html
@@ -80,7 +80,8 @@ $(function() {
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a2995c32e12246e892f7f4cb621a2819c">set_default_keys</a>(std::vector&lt; String &gt; keys)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#ae3ce5349493f402b82e755a0a180bd9a">set_device_type</a>(int device_type)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a36f21402bccb03300478d6c85bd05512">set_name</a>()</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a75150485a300a03a22d9edad8619cc25">TargetKind</a> class</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8">set_target_parser</a>(FTVMTargetParser parser)</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a75150485a300a03a22d9edad8619cc25">TargetKind</a> class</td><td class="entry"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></td><td class="entry"><span class="mlabel">friend</span></td></tr>
 </table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry.html b/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry.html
index d1d28cc78..d721845f6 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry.html
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry.html
@@ -79,7 +79,7 @@ $(function() {
 <div class="dynheader">
 Collaboration diagram for tvm::TargetKindRegEntry:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1TargetKindRegEntry__coll__graph.svg" width="206" height="220"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1TargetKindRegEntry__coll__graph.svg" width="206" height="235"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
@@ -99,6 +99,9 @@ Public Member Functions</h2></td></tr>
 <tr class="memitem:a00b1eb0ab1927210a6a519baecb3085e"><td class="memTemplItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">TargetKindRegEntry</a> &amp;&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a00b1eb0ab1927210a6a519baecb3085e">set_attrs_preprocessor</a> (FLambda f)</td></tr>
 <tr class="memdesc:a00b1eb0ab1927210a6a519baecb3085e"><td class="mdescLeft">&#160;</td><td class="mdescRight">Set the pre-processing function applied upon target creation.  <a href="#a00b1eb0ab1927210a6a519baecb3085e">More...</a><br /></td></tr>
 <tr class="separator:a00b1eb0ab1927210a6a519baecb3085e"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a21152c83f61180dcb6293226a98025a8"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">TargetKindRegEntry</a> &amp;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8">set_target_parser</a> (<a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">FTVMTargetParser</a> parser)</td></tr>
+<tr class="memdesc:a21152c83f61180dcb6293226a98025a8"><td class="mdescLeft">&#160;</td><td class="mdescRight">Set the parsing function applied upon target creation.  <a href="#a21152c83f61180dcb6293226a98025a8">More...</a><br /></td></tr>
+<tr class="separator:a21152c83f61180dcb6293226a98025a8"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:accd2e15133cf6e6fe2703f57464eae89"><td class="memTemplParams" colspan="2">template&lt;typename ValueType &gt; </td></tr>
 <tr class="memitem:accd2e15133cf6e6fe2703f57464eae89"><td class="memTemplItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">TargetKindRegEntry</a> &amp;&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="classtvm_1_1TargetKindRegEntry.html#accd2e15133cf6e6fe2703f57464eae89">add_attr_option</a> (const <a class="el" href="classtvm_1_1runtime_1_1String.html">String</a> &amp;key)</td></tr>
 <tr class="memdesc:accd2e15133cf6e6fe2703f57464eae89"><td class="mdescLeft">&#160;</td><td class="mdescRight">Register a valid configuration option and its ValueType for validation.  <a href="#accd2e15133cf6e6fe2703f57464eae89">More...</a><br /></td></tr>
@@ -518,6 +521,40 @@ template&lt;typename FLambda &gt; </div>
 
 <p>Set name of the <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> to be the same as registry if it is empty. </p>
 
+</div>
+</div>
+<a id="a21152c83f61180dcb6293226a98025a8"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a21152c83f61180dcb6293226a98025a8">&#9670;&nbsp;</a></span>set_target_parser()</h2>
+
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="classtvm_1_1TargetKindRegEntry.html">TargetKindRegEntry</a> &amp; tvm::TargetKindRegEntry::set_target_parser </td>
+          <td>(</td>
+          <td class="paramtype"><a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">FTVMTargetParser</a>&#160;</td>
+          <td class="paramname"><em>parser</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+<p>Set the parsing function applied upon target creation. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">parser</td><td>The <a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> parsing function </td></tr>
+  </table>
+  </dd>
+</dl>
+
 </div>
 </div>
 <h2 class="groupheader">Friends And Related Function Documentation</h2>
diff --git a/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry__coll__graph.svg
index a2ff028d3..c28f1567b 100644
--- a/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1TargetKindRegEntry__coll__graph.svg
@@ -4,23 +4,24 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: tvm::TargetKindRegEntry Pages: 1 -->
-<svg width="154pt" height="165pt"
- viewBox="0.00 0.00 154.00 165.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 161)">
+<svg width="154pt" height="176pt"
+ viewBox="0.00 0.00 154.00 176.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 172)">
 <title>tvm::TargetKindRegEntry</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-161 150,-161 150,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-172 150,-172 150,4 -4,4"/>
 <!-- Node1 -->
 <g id="node1" class="node">
 <title>Node1</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="0,-.5 0,-156.5 146,-156.5 146,-.5 0,-.5"/>
-<text text-anchor="middle" x="73" y="-144.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindRegEntry</text>
-<polyline fill="none" stroke="#000000" points="0,-137.5 146,-137.5 "/>
-<text text-anchor="middle" x="73" y="-125.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="0,-118.5 146,-118.5 "/>
-<text text-anchor="start" x="8" y="-106.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_attr()</text>
-<text text-anchor="start" x="8" y="-95.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_device_type()</text>
-<text text-anchor="start" x="8" y="-84.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_default_keys()</text>
-<text text-anchor="start" x="8" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_attrs_preprocessor()</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="0,-.5 0,-167.5 146,-167.5 146,-.5 0,-.5"/>
+<text text-anchor="middle" x="73" y="-155.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindRegEntry</text>
+<polyline fill="none" stroke="#000000" points="0,-148.5 146,-148.5 "/>
+<text text-anchor="middle" x="73" y="-136.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="0,-129.5 146,-129.5 "/>
+<text text-anchor="start" x="8" y="-117.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_attr()</text>
+<text text-anchor="start" x="8" y="-106.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_device_type()</text>
+<text text-anchor="start" x="8" y="-95.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_default_keys()</text>
+<text text-anchor="start" x="8" y="-84.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_attrs_preprocessor()</text>
+<text text-anchor="start" x="8" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_target_parser()</text>
 <text text-anchor="start" x="8" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ add_attr_option()</text>
 <text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ add_attr_option()</text>
 <text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ set_name()</text>
diff --git a/docs/reference/api/doxygen/functions_func_l.html b/docs/reference/api/doxygen/functions_func_l.html
index 862f45acf..81dceb735 100644
--- a/docs/reference/api/doxygen/functions_func_l.html
+++ b/docs/reference/api/doxygen/functions_func_l.html
@@ -191,7 +191,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1IRModuleNode.html#ae078ad8def39579701d144578c787bcf">tvm::IRModuleNode</a>
 </li>
 <li>LookupTypeDef()
-: <a class="el" href="classtvm_1_1IRModuleNode.html#a23f3769fe60b3b06c9d163650ea7caaf">tvm::IRModuleNode</a>
+: <a class="el" href="classtvm_1_1IRModuleNode.html#ae095c1fd87642bd417224668c5b4d910">tvm::IRModuleNode</a>
 </li>
 <li>LoopRV()
 : <a class="el" href="classtvm_1_1tir_1_1LoopRV.html#ad47c4e83701875b84c9efd36ee3dc323">tvm::tir::LoopRV</a>
diff --git a/docs/reference/api/doxygen/functions_func_r.html b/docs/reference/api/doxygen/functions_func_r.html
index 7791d8066..9994ba3cf 100644
--- a/docs/reference/api/doxygen/functions_func_r.html
+++ b/docs/reference/api/doxygen/functions_func_r.html
@@ -282,7 +282,7 @@ $(function() {
 </li>
 <li>Rewrite_()
 : <a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html#a867da2f78446c33e201da79e9e1a0b2e">tvm::relay::ExprRewriter</a>
-, <a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html#a2424d6590fceb087cb1624ab8d3348a1">tvm::relay::MixedModeMutator</a>
+, <a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html#a3b53908f4b8cc3708ca75892e47f0929">tvm::relay::MixedModeMutator</a>
 </li>
 <li>RewriteCooperativeFetch()
 : <a class="el" href="classtvm_1_1meta__schedule_1_1Postproc.html#a8a8e8e047dcdcf89ad9d96eed47c293a">tvm::meta_schedule::Postproc</a>
diff --git a/docs/reference/api/doxygen/functions_func_s.html b/docs/reference/api/doxygen/functions_func_s.html
index 953c95515..578c3e804 100644
--- a/docs/reference/api/doxygen/functions_func_s.html
+++ b/docs/reference/api/doxygen/functions_func_s.html
@@ -357,6 +357,9 @@ $(function() {
 <li>set_support_level()
 : <a class="el" href="classtvm_1_1OpRegEntry.html#ab4f7e0f99c8acf2153e15f7cbb6c3c97">tvm::OpRegEntry</a>
 </li>
+<li>set_target_parser()
+: <a class="el" href="classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8">tvm::TargetKindRegEntry</a>
+</li>
 <li>set_upper_bound()
 : <a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html#aec039b071d826ab164c5abe123aefaa3">tvm::detail::AttrDocEntry</a>
 , <a class="el" href="structtvm_1_1detail_1_1AttrInitEntry.html#a69e876dfc10eed9573c3043ea5ef2013">tvm::detail::AttrInitEntry&lt; T &gt;</a>
@@ -547,7 +550,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html#a1bb22d4bb3983b4c3ca87b0daef7f93b">tvm::SHashReducer::Handler</a>
 </li>
 <li>SHashReducer()
-: <a class="el" href="classtvm_1_1SHashReducer.html#af7eb7ccc3e5863fe580f1b8dceac5d88">tvm::SHashReducer</a>
+: <a class="el" href="classtvm_1_1SHashReducer.html#acb4d90cb74f617620d0027649adf8f17">tvm::SHashReducer</a>
 </li>
 <li>ShouldLinkParameters()
 : <a class="el" href="classtvm_1_1IRModuleNode.html#a6bb288d971d067764448a2c715702a8c">tvm::IRModuleNode</a>
@@ -645,7 +648,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1te_1_1Stage.html#a51432f38d9ec4792a2525023179ae604">tvm::te::Stage</a>
 </li>
 <li>SplitStep()
-: <a class="el" href="classtvm_1_1auto__scheduler_1_1SplitStep.html#a64ed86582a56a2645b3e4eb44ecb31af">tvm::auto_scheduler::SplitStep</a>
+: <a class="el" href="classtvm_1_1auto__scheduler_1_1SplitStep.html#a184575a8029d77f7a3bee23d81141df5">tvm::auto_scheduler::SplitStep</a>
 </li>
 <li>Stage()
 : <a class="el" href="classtvm_1_1auto__scheduler_1_1Stage.html#af0643fe8c1298451c9a322f915c48843">tvm::auto_scheduler::Stage</a>
@@ -682,7 +685,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1tir_1_1StmtSRefNode.html#afc61714fbac246f72d02d0729fb9ba2d">tvm::tir::StmtSRefNode</a>
 </li>
 <li>StmtNode()
-: <a class="el" href="classtvm_1_1tir_1_1StmtNode.html#a67693c4e97ae49890ea74605fe1b1f74">tvm::tir::StmtNode</a>
+: <a class="el" href="classtvm_1_1tir_1_1StmtNode.html#a79e21b14d3ab57209577bf4a8f694a87">tvm::tir::StmtNode</a>
 </li>
 <li>StmtSRef()
 : <a class="el" href="classtvm_1_1tir_1_1StmtSRef.html#a31687ace5dc4fe487ffb87d658d86412">tvm::tir::StmtSRef</a>
@@ -721,7 +724,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1runtime_1_1DeviceAPI.html#ac29b9295c432a87658392872c644864f">tvm::runtime::DeviceAPI</a>
 </li>
 <li>String()
-: <a class="el" href="classtvm_1_1runtime_1_1String.html#a02fca36e3ff55cc1e83635b02a11fca3">tvm::runtime::String</a>
+: <a class="el" href="classtvm_1_1runtime_1_1String.html#ac5d930b522e9fef9c07e51819d96d2f3">tvm::runtime::String</a>
 </li>
 <li>StringImm()
 : <a class="el" href="classtvm_1_1tir_1_1StringImm.html#a0f2830290e055f677c5d5dea98aab726">tvm::tir::StringImm</a>
diff --git a/docs/reference/api/doxygen/functions_func_v.html b/docs/reference/api/doxygen/functions_func_v.html
index 30604c7a5..684ce99f4 100644
--- a/docs/reference/api/doxygen/functions_func_v.html
+++ b/docs/reference/api/doxygen/functions_func_v.html
@@ -435,7 +435,7 @@ $(function() {
 <li>VisitType_()
 : <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#a05485baebc1e25710714f65b68124f73">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
 , <a class="el" href="classtvm_1_1TypeMutator.html#ac694fbe28eb7026d30c5ca5fa2fb4a1a">tvm::TypeMutator</a>
-, <a class="el" href="classtvm_1_1TypeVisitor.html#af92188034706eec6c1ce5c8240f65cc0">tvm::TypeVisitor</a>
+, <a class="el" href="classtvm_1_1TypeVisitor.html#ac8845fbf58c1a1f0ebc23c7ee403aaab">tvm::TypeVisitor</a>
 </li>
 <li>VisitTypeDefault_()
 : <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#a91553f9e04c39b3821a70ae4f7b0c597">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
@@ -454,7 +454,7 @@ $(function() {
 : <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html#a8f8c990ee4fa7cb7472f5440f2ca3bde">tvm::runtime::vm::VMFrame</a>
 </li>
 <li>VMFunction()
-: <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html#af9d2bdcf19642c21bc4909b9e9b6196d">tvm::runtime::vm::VMFunction</a>
+: <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html#aea763069fe1dd6849ce0d1ec336931e0">tvm::runtime::vm::VMFunction</a>
 </li>
 <li>Void()
 : <a class="el" href="classtvm_1_1runtime_1_1DataType.html#ab8dc0832aff8fd7421884c0fe20a3bfd">tvm::runtime::DataType</a>
diff --git a/docs/reference/api/doxygen/functions_l.html b/docs/reference/api/doxygen/functions_l.html
index def3b88b3..4c351e159 100644
--- a/docs/reference/api/doxygen/functions_l.html
+++ b/docs/reference/api/doxygen/functions_l.html
@@ -262,7 +262,7 @@ $(function() {
 , <a class="el" href="classtvm_1_1meta__schedule_1_1TuneContextNode.html#a4ede62a091db49ae8e67d84cfba1e859">tvm::meta_schedule::TuneContextNode</a>
 </li>
 <li>Lookup()
-: <a class="el" href="classtvm_1_1IRModuleNode.html#a8fb3c0a0e2c0a679753e6f8f45f54860">tvm::IRModuleNode</a>
+: <a class="el" href="classtvm_1_1IRModuleNode.html#a8430de337205d25778704d558f892ecb">tvm::IRModuleNode</a>
 </li>
 <li>LookupHashedValue()
 : <a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html#a74fa7e570590ff3b040178cb50f48ee3">tvm::SHashReducer::Handler</a>
@@ -271,7 +271,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1IRModuleNode.html#ae078ad8def39579701d144578c787bcf">tvm::IRModuleNode</a>
 </li>
 <li>LookupTypeDef()
-: <a class="el" href="classtvm_1_1IRModuleNode.html#ae095c1fd87642bd417224668c5b4d910">tvm::IRModuleNode</a>
+: <a class="el" href="classtvm_1_1IRModuleNode.html#a23f3769fe60b3b06c9d163650ea7caaf">tvm::IRModuleNode</a>
 </li>
 <li>loop_var
 : <a class="el" href="classtvm_1_1tir_1_1ForNode.html#a7dbf66bdcf8ed397321517f0915a0946">tvm::tir::ForNode</a>
diff --git a/docs/reference/api/doxygen/functions_r.html b/docs/reference/api/doxygen/functions_r.html
index a30726841..fe39f88fe 100644
--- a/docs/reference/api/doxygen/functions_r.html
+++ b/docs/reference/api/doxygen/functions_r.html
@@ -460,7 +460,7 @@ $(function() {
 </li>
 <li>Rewrite_()
 : <a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html#a956ed40ff5f64ecef5e6034056184cd7">tvm::relay::ExprRewriter</a>
-, <a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html#aedab19fa2803a80d4148f83c1c4b0814">tvm::relay::MixedModeMutator</a>
+, <a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html#a3b53908f4b8cc3708ca75892e47f0929">tvm::relay::MixedModeMutator</a>
 </li>
 <li>rewrite_once
 : <a class="el" href="classtvm_1_1relay_1_1DFPatternCallbackNode.html#a6e4c091ba92fee08251d29633da9b8b8">tvm::relay::DFPatternCallbackNode</a>
diff --git a/docs/reference/api/doxygen/functions_s.html b/docs/reference/api/doxygen/functions_s.html
index 3f5257647..50eb461d1 100644
--- a/docs/reference/api/doxygen/functions_s.html
+++ b/docs/reference/api/doxygen/functions_s.html
@@ -445,6 +445,9 @@ $(function() {
 <li>set_support_level()
 : <a class="el" href="classtvm_1_1OpRegEntry.html#ab4f7e0f99c8acf2153e15f7cbb6c3c97">tvm::OpRegEntry</a>
 </li>
+<li>set_target_parser()
+: <a class="el" href="classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8">tvm::TargetKindRegEntry</a>
+</li>
 <li>set_upper_bound()
 : <a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html#aec039b071d826ab164c5abe123aefaa3">tvm::detail::AttrDocEntry</a>
 , <a class="el" href="structtvm_1_1detail_1_1AttrInitEntry.html#a69e876dfc10eed9573c3043ea5ef2013">tvm::detail::AttrInitEntry&lt; T &gt;</a>
@@ -799,7 +802,7 @@ $(function() {
 , <a class="el" href="classtvm_1_1SpanNode.html#ad573167f93facbfbee19983b08bbba3d">tvm::SpanNode</a>
 </li>
 <li>SourceMap()
-: <a class="el" href="classtvm_1_1parser_1_1SourceMap.html#a43518e78ad2060e9400d893078c48008">tvm::parser::SourceMap</a>
+: <a class="el" href="classtvm_1_1parser_1_1SourceMap.html#afc48463cc0967ab79876178613a5aff2">tvm::parser::SourceMap</a>
 </li>
 <li>space_generator
 : <a class="el" href="classtvm_1_1meta__schedule_1_1TuneContextNode.html#a7bdfdd48530bfe380c5f6c143158a07f">tvm::meta_schedule::TuneContextNode</a>
@@ -821,7 +824,7 @@ $(function() {
 </li>
 <li>Span()
 : <a class="el" href="classtvm_1_1Span.html#a5216631b639e8c802263d87d3fe9e5f6">tvm::Span</a>
-, <a class="el" href="classtvm_1_1support_1_1Span.html#a3c22dd06856e7029e7107adf38eb72f5">tvm::support::Span&lt; T, W &gt;</a>
+, <a class="el" href="classtvm_1_1support_1_1Span.html#a77653730a2542edf93b7c4413a72f3ec">tvm::support::Span&lt; T, W &gt;</a>
 </li>
 <li>span
 : <a class="el" href="classtvm_1_1tir_1_1BufferNode.html#a13fc164e1b65cee741b4895df6316a4a">tvm::tir::BufferNode</a>
@@ -895,7 +898,7 @@ $(function() {
 </li>
 <li>Stage()
 : <a class="el" href="classtvm_1_1auto__scheduler_1_1Stage.html#a39ffbb1b4e189180bc4067e74965f42b">tvm::auto_scheduler::Stage</a>
-, <a class="el" href="classtvm_1_1te_1_1Stage.html#afec82602b9321c489b88632a005335f8">tvm::te::Stage</a>
+, <a class="el" href="classtvm_1_1te_1_1Stage.html#a1ecdc9a000be62c9cc26a96d4c33e36e">tvm::te::Stage</a>
 </li>
 <li>stage_id
 : <a class="el" href="classtvm_1_1auto__scheduler_1_1StepNode.html#afcc7aaf263348f66139307affbfcee09">tvm::auto_scheduler::StepNode</a>
@@ -1069,7 +1072,7 @@ $(function() {
 , <a class="el" href="classtvm_1_1tir_1_1BufferNode.html#ac18ddd10b79a30ae57d3a8283686259d">tvm::tir::BufferNode</a>
 </li>
 <li>String()
-: <a class="el" href="classtvm_1_1runtime_1_1String.html#acf549b3c43142639879e0fc31ea5cd77">tvm::runtime::String</a>
+: <a class="el" href="classtvm_1_1runtime_1_1String.html#a68df7bab89fca339e3918438dd80300d">tvm::runtime::String</a>
 , <a class="el" href="classtvm_1_1runtime_1_1StringObj_1_1FromStd.html#a7fb804f7dc96dd9f705c84095f37f1ca">tvm::runtime::StringObj::FromStd</a>
 , <a class="el" href="classtvm_1_1runtime_1_1StringObj.html#a7fb804f7dc96dd9f705c84095f37f1ca">tvm::runtime::StringObj</a>
 </li>
diff --git a/docs/reference/api/doxygen/functions_t.html b/docs/reference/api/doxygen/functions_t.html
index 7afc3623e..704d0d762 100644
--- a/docs/reference/api/doxygen/functions_t.html
+++ b/docs/reference/api/doxygen/functions_t.html
@@ -81,7 +81,7 @@ $(function() {
 , <a class="el" href="structtvm_1_1runtime_1_1vm_1_1Instruction.html#a46879dbe84105fb621a6167f8d73b223">tvm::runtime::vm::Instruction</a>
 </li>
 <li>Target()
-: <a class="el" href="classtvm_1_1Target.html#a77f3d7cc97d8cfd7172af58b4e784d89">tvm::Target</a>
+: <a class="el" href="classtvm_1_1Target.html#a58a5a1e042e265fe5a6973045226fe1a">tvm::Target</a>
 </li>
 <li>target
 : <a class="el" href="classtvm_1_1VirtualDeviceNode.html#a8b2d427d9e21886ccaeaae5e9cc55aaf">tvm::VirtualDeviceNode</a>
@@ -96,6 +96,9 @@ $(function() {
 <li>target_iter_id
 : <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html#a5691967a42b989a54cf8c40c1627988e">tvm::auto_scheduler::ComputeAtStepNode</a>
 </li>
+<li>target_parser
+: <a class="el" href="classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f">tvm::TargetKindNode</a>
+</li>
 <li>target_shape
 : <a class="el" href="structtvm_1_1relay_1_1AffineGridAttrs.html#a3e8a722c28015e4fa002da324fc6d5b3">tvm::relay::AffineGridAttrs</a>
 </li>
@@ -1320,10 +1323,10 @@ $(function() {
 : <a class="el" href="classtvm_1_1TypeData.html#a0a98fd1095812379d2bd1337db1511c1">tvm::TypeData</a>
 </li>
 <li>TypedEnvFunc()
-: <a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#a0d72a6fa7263821c14bcd37837998ed9">tvm::TypedEnvFunc&lt; R(Args...)&gt;</a>
+: <a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#a41a6b9014d0feeb628ca7edfd0d26f0b">tvm::TypedEnvFunc&lt; R(Args...)&gt;</a>
 </li>
 <li>TypedPackedFunc()
-: <a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#a4abadc6786dd14a3aed6e2b5b342d1d6">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;</a>
+: <a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc_3_01R_07Args_8_8_8_08_4.html#a6b346a6d0b601eff5a100c7a207e9c86">tvm::runtime::TypedPackedFunc&lt; R(Args...)&gt;</a>
 </li>
 <li>TypeIndex2Key()
 : <a class="el" href="classtvm_1_1runtime_1_1Object.html#a817ba6c23b7ee1821c48a75edf255a30">tvm::runtime::Object</a>
@@ -1346,7 +1349,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1TypeRelation.html#ac26b1897eab8197ed26606ab81b7403b">tvm::TypeRelation</a>
 </li>
 <li>TypeReporter()
-: <a class="el" href="classtvm_1_1TypeReporter.html#aa3dc38a3c84d324d0b3a9f358460a091">tvm::TypeReporter</a>
+: <a class="el" href="classtvm_1_1TypeReporter.html#a8e7e05a07f9f7ad9bea91f27afac9051">tvm::TypeReporter</a>
 </li>
 <li>types
 : <a class="el" href="classtvm_1_1TupleAffineTypeNode.html#a30c834b7e1cb64467e6587ac16ebb187">tvm::TupleAffineTypeNode</a>
diff --git a/docs/reference/api/doxygen/functions_v.html b/docs/reference/api/doxygen/functions_v.html
index 5bc282ed3..a6024f8c1 100644
--- a/docs/reference/api/doxygen/functions_v.html
+++ b/docs/reference/api/doxygen/functions_v.html
@@ -561,7 +561,7 @@ $(function() {
 </li>
 <li>VisitStmt_()
 : <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#a5f86aa92770d279f28b47260f879b0bd">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aecd16bf1a6715ea36f6c30e5dc2ceae7">tvm::tir::StmtMutator</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a2ec423a8f109916abf02ac463308f58a">tvm::tir::StmtMutator</a>
 , <a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html#a9f7515a82ddc4a41247a1622563feed6">tvm::tir::StmtVisitor</a>
 </li>
 <li>VisitStmtDefault_()
@@ -576,9 +576,9 @@ $(function() {
 , <a class="el" href="classtvm_1_1TypeMutator.html#a84e824911927d98e20a338eab8b75a45">tvm::TypeMutator</a>
 </li>
 <li>VisitType_()
-: <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#a40e3787a169f391bda4852aa7caf33cb">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1TypeMutator.html#a8171dc89a947d6224e83e86ce5d06d11">tvm::TypeMutator</a>
-, <a class="el" href="classtvm_1_1TypeVisitor.html#a063b7b1705ffabb92e58093032686e90">tvm::TypeVisitor</a>
+: <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#aca50c939b0a6d8ebea33865c26e82729">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
+, <a class="el" href="classtvm_1_1TypeMutator.html#a2a78bda75555650a37a80e1e074d562a">tvm::TypeMutator</a>
+, <a class="el" href="classtvm_1_1TypeVisitor.html#af92188034706eec6c1ce5c8240f65cc0">tvm::TypeVisitor</a>
 </li>
 <li>VisitTypeDefault_()
 : <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#a91553f9e04c39b3821a70ae4f7b0c597">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
diff --git a/docs/reference/api/doxygen/functions_vars_t.html b/docs/reference/api/doxygen/functions_vars_t.html
index 158b18790..7171cba3f 100644
--- a/docs/reference/api/doxygen/functions_vars_t.html
+++ b/docs/reference/api/doxygen/functions_vars_t.html
@@ -87,6 +87,9 @@ $(function() {
 <li>target_iter_id
 : <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html#a5691967a42b989a54cf8c40c1627988e">tvm::auto_scheduler::ComputeAtStepNode</a>
 </li>
+<li>target_parser
+: <a class="el" href="classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f">tvm::TargetKindNode</a>
+</li>
 <li>target_shape
 : <a class="el" href="structtvm_1_1relay_1_1AffineGridAttrs.html#a3e8a722c28015e4fa002da324fc6d5b3">tvm::relay::AffineGridAttrs</a>
 </li>
diff --git a/docs/reference/api/doxygen/hierarchy.html b/docs/reference/api/doxygen/hierarchy.html
index 2f50c36a9..b462e24b9 100644
--- a/docs/reference/api/doxygen/hierarchy.html
+++ b/docs/reference/api/doxygen/hierarchy.html
@@ -145,9 +145,9 @@ This inheritance list is sorted roughly, but not completely, alphabetically:</di
 <tr id="row_58_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1FrameBuffer.html" target="_self">tvm::runtime::micro_rpc::FrameBuffer</a></td><td class="desc"></td></tr>
 <tr id="row_59_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1Framer.html" target="_self">tvm::runtime::micro_rpc::Framer</a></td><td class="desc"></td></tr>
 <tr id="row_60_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1algo_1_1GreedyBase.html" target="_self">tvm::tir::usmp::algo::GreedyBase</a></td><td class="desc">This is the base class for Greedy Algorithms where the sorting is specialized in the extended classes based on the greedy criteria </td></tr>
-<tr id="row_61_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html" target="_self">tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_62_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">tvm::SEqualReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
-<tr id="row_63_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">tvm::SHashReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
+<tr id="row_61_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">tvm::SEqualReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
+<tr id="row_62_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">tvm::SHashReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
+<tr id="row_63_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html" target="_self">tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_64_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html" target="_self">dmlc::serializer::Handler&lt; DLDataType &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_65_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html" target="_self">dmlc::serializer::Handler&lt; DLDevice &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_66_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html" target="_self">tvm::detail::ImplSEqualReduce&lt; T, bool &gt;</a></td><td class="desc"></td></tr>
@@ -1354,46 +1354,47 @@ This inheritance list is sorted roughly, but not completely, alphabetically:</di
 <tr id="row_196_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; RunnerResult()&gt;</a></td><td class="desc"></td></tr>
 <tr id="row_197_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; String()&gt;</a></td><td class="desc"></td></tr>
 <tr id="row_198_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; String(const Array&lt; ObjectRef &gt; &amp;inputs, const Array&lt; ObjectRef &gt; &amp;attrs, const Optional&lt; ObjectRef &gt; &amp;decision, const Array&lt; String &gt; &amp;outputs)&gt;</a></td><td class="desc">< [...]
-<tr id="row_199_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; te::Schedule(const Attrs &amp;attrs, const Array&lt; te::Tensor &gt; &amp;outs, const Target &amp;target)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_200_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void()&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_201_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const Array&lt; MeasureCandidate &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_202_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const Array&lt; tir::Schedule &gt; &amp;, const Optional&lt; Database &gt; &amp;, const Optional&lt; CostModel &gt; &amp;)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_203_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TaskScheduler &amp;task_scheduler, int task_id, const Array&lt; MeasureCandidate &gt; &amp;measure_candidates, const Array&lt; BuilderResult &gt; &amp;builds, const Array&lt; RunnerResult &gt; &amp;results)&gt;</a></t [...]
-<tr id="row_204_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuneContext &amp;)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_205_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuneContext &amp;, const Array&lt; MeasureCandidate &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_206_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuneContext &amp;, const Array&lt; MeasureCandidate &gt; &amp;, void *p_addr)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_207_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuningRecord &amp;)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_208_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(int)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_209_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(size_t, void *)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_210_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(String)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_211_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(tvm::DiagnosticContext ctx)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_212_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; Workload(const IRModule &amp;)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_213_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">tvm::TypeFunctor&lt; FType &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_214_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_self">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_215_"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_215_" class="arrow" onclick="toggleFolder('215_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">tvm::TypeFunctor&lt; Type(const Type &amp;n)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_215_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeMutator.html" target="_self">tvm::TypeMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeMutator.html" title="TypeMutator that mutates expressions. ">TypeMutator</a> that mutates expressions </td></tr>
-<tr id="row_216_" class="even"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_216_" class="arrow" onclick="toggleFolder('216_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">tvm::TypeFunctor&lt; void(const Type &amp;n)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_216_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVisitor.html" target="_self">tvm::TypeVisitor</a></td><td class="desc">A type visitor that recursively visit types </td></tr>
-<tr id="row_217_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1TypeIndex.html" target="_self">tvm::runtime::TypeIndex</a></td><td class="desc">Namespace for the list of type index </td></tr>
-<tr id="row_218_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName.html" target="_self">tvm::detail::TypeName&lt; T &gt;</a></td><td class="desc">Helper struct to get the type name known to tvm </td></tr>
-<tr id="row_219_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01bool_01_4.html" target="_self">tvm::detail::TypeName&lt; bool &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_220_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01DataType_01_4.html" target="_self">tvm::detail::TypeName&lt; DataType &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_221_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01double_01_4.html" target="_self">tvm::detail::TypeName&lt; double &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_222_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int_01_4.html" target="_self">tvm::detail::TypeName&lt; int &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_223_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int64__t_01_4.html" target="_self">tvm::detail::TypeName&lt; int64_t &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_224_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01uint64__t_01_4.html" target="_self">tvm::detail::TypeName&lt; uint64_t &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_225_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01void_01_5_01_4.html" target="_self">tvm::detail::TypeName&lt; void * &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_226_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1Unframer.html" target="_self">tvm::runtime::micro_rpc::Unframer</a></td><td class="desc"></td></tr>
-<tr id="row_227_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1v__info.html" target="_self">tvm::relay::v_info</a></td><td class="desc">A struct to keep info of traversed expr in ExpandDataflow function </td></tr>
-<tr id="row_228_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1Array_1_1ValueConverter.html" target="_self">tvm::runtime::Array&lt; T, typename &gt;::ValueConverter</a></td><td class="desc"></td></tr>
-<tr id="row_229_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1ValueTypeInfoMaker.html" target="_self">tvm::detail::ValueTypeInfoMaker&lt; ValueType, IsArray, IsMap &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_230_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceCache.html" target="_self">tvm::VirtualDeviceCache</a></td><td class="desc">A cache of <code>VirtualDevices</code>. This can be used: </td></tr>
-<tr id="row_231_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html" target="_self">tvm::runtime::vm::VMFrame</a></td><td class="desc">A representation of a stack frame </td></tr>
-<tr id="row_232_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html" target="_self">tvm::runtime::vm::VMFunction</a></td><td class="desc">A representation of a Relay function in the VM </td></tr>
-<tr id="row_233_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1With.html" target="_self">tvm::With&lt; ContextType &gt;</a></td><td class="desc">RAII wrapper function to enter and exit a context object similar to python's with syntax </td></tr>
-<tr id="row_234_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1meta__schedule_1_1WorkloadEqual.html" target="_self">tvm::meta_schedule::WorkloadEqual</a></td><td class="desc">The equality check for <a class="el" href="classtvm_1_1meta__schedule_1_1Workload.html" title="Managed reference to WorkloadNode. ">Workload</a> </td></tr>
-<tr id="row_235_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1meta__schedule_1_1WorkloadHash.html" target="_self">tvm::meta_schedule::WorkloadHash</a></td><td class="desc">The hash method for <a class="el" href="classtvm_1_1meta__schedule_1_1Workload.html" title="Managed reference to WorkloadNode. ">Workload</a> </td></tr>
-<tr id="row_236_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1WriteStream.html" target="_self">tvm::runtime::micro_rpc::WriteStream</a></td><td class="desc"></td></tr>
+<tr id="row_199_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; TargetJSON(TargetJSON)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_200_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; te::Schedule(const Attrs &amp;attrs, const Array&lt; te::Tensor &gt; &amp;outs, const Target &amp;target)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_201_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void()&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_202_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const Array&lt; MeasureCandidate &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_203_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const Array&lt; tir::Schedule &gt; &amp;, const Optional&lt; Database &gt; &amp;, const Optional&lt; CostModel &gt; &amp;)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_204_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TaskScheduler &amp;task_scheduler, int task_id, const Array&lt; MeasureCandidate &gt; &amp;measure_candidates, const Array&lt; BuilderResult &gt; &amp;builds, const Array&lt; RunnerResult &gt; &amp;result [...]
+<tr id="row_205_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuneContext &amp;)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_206_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuneContext &amp;, const Array&lt; MeasureCandidate &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_207_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuneContext &amp;, const Array&lt; MeasureCandidate &gt; &amp;, void *p_addr)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_208_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(const TuningRecord &amp;)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_209_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(int)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_210_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(size_t, void *)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_211_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(String)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_212_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; void(tvm::DiagnosticContext ctx)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_213_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_self">tvm::runtime::TypedPackedFunc&lt; Workload(const IRModule &amp;)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_214_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">tvm::TypeFunctor&lt; FType &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_215_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_self">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_216_" class="even"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_216_" class="arrow" onclick="toggleFolder('216_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">tvm::TypeFunctor&lt; Type(const Type &amp;n)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_216_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeMutator.html" target="_self">tvm::TypeMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeMutator.html" title="TypeMutator that mutates expressions. ">TypeMutator</a> that mutates expressions </td></tr>
+<tr id="row_217_"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_217_" class="arrow" onclick="toggleFolder('217_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">tvm::TypeFunctor&lt; void(const Type &amp;n)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_217_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVisitor.html" target="_self">tvm::TypeVisitor</a></td><td class="desc">A type visitor that recursively visit types </td></tr>
+<tr id="row_218_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1TypeIndex.html" target="_self">tvm::runtime::TypeIndex</a></td><td class="desc">Namespace for the list of type index </td></tr>
+<tr id="row_219_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName.html" target="_self">tvm::detail::TypeName&lt; T &gt;</a></td><td class="desc">Helper struct to get the type name known to tvm </td></tr>
+<tr id="row_220_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01bool_01_4.html" target="_self">tvm::detail::TypeName&lt; bool &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_221_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01DataType_01_4.html" target="_self">tvm::detail::TypeName&lt; DataType &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_222_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01double_01_4.html" target="_self">tvm::detail::TypeName&lt; double &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_223_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int_01_4.html" target="_self">tvm::detail::TypeName&lt; int &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_224_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01int64__t_01_4.html" target="_self">tvm::detail::TypeName&lt; int64_t &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_225_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01uint64__t_01_4.html" target="_self">tvm::detail::TypeName&lt; uint64_t &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_226_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01void_01_5_01_4.html" target="_self">tvm::detail::TypeName&lt; void * &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_227_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1Unframer.html" target="_self">tvm::runtime::micro_rpc::Unframer</a></td><td class="desc"></td></tr>
+<tr id="row_228_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1v__info.html" target="_self">tvm::relay::v_info</a></td><td class="desc">A struct to keep info of traversed expr in ExpandDataflow function </td></tr>
+<tr id="row_229_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1Array_1_1ValueConverter.html" target="_self">tvm::runtime::Array&lt; T, typename &gt;::ValueConverter</a></td><td class="desc"></td></tr>
+<tr id="row_230_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1detail_1_1ValueTypeInfoMaker.html" target="_self">tvm::detail::ValueTypeInfoMaker&lt; ValueType, IsArray, IsMap &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_231_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceCache.html" target="_self">tvm::VirtualDeviceCache</a></td><td class="desc">A cache of <code>VirtualDevices</code>. This can be used: </td></tr>
+<tr id="row_232_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html" target="_self">tvm::runtime::vm::VMFrame</a></td><td class="desc">A representation of a stack frame </td></tr>
+<tr id="row_233_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html" target="_self">tvm::runtime::vm::VMFunction</a></td><td class="desc">A representation of a Relay function in the VM </td></tr>
+<tr id="row_234_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1With.html" target="_self">tvm::With&lt; ContextType &gt;</a></td><td class="desc">RAII wrapper function to enter and exit a context object similar to python's with syntax </td></tr>
+<tr id="row_235_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1meta__schedule_1_1WorkloadEqual.html" target="_self">tvm::meta_schedule::WorkloadEqual</a></td><td class="desc">The equality check for <a class="el" href="classtvm_1_1meta__schedule_1_1Workload.html" title="Managed reference to WorkloadNode. ">Workload</a> </td></tr>
+<tr id="row_236_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1meta__schedule_1_1WorkloadHash.html" target="_self">tvm::meta_schedule::WorkloadHash</a></td><td class="desc">The hash method for <a class="el" href="classtvm_1_1meta__schedule_1_1Workload.html" title="Managed reference to WorkloadNode. ">Workload</a> </td></tr>
+<tr id="row_237_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1WriteStream.html" target="_self">tvm::runtime::micro_rpc::WriteStream</a></td><td class="desc"></td></tr>
 </table>
 </div><!-- directory -->
 </div><!-- contents -->
diff --git a/docs/reference/api/doxygen/inherit_graph_10.svg b/docs/reference/api/doxygen/inherit_graph_10.svg
index 7c878468a..c5115f385 100644
--- a/docs/reference/api/doxygen/inherit_graph_10.svg
+++ b/docs/reference/api/doxygen/inherit_graph_10.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 62)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-62 186,-62 186,4 -4,4"/>
-<!-- Node1262 -->
+<!-- Node1263 -->
 <g id="node1" class="node">
-<title>Node1262</title>
+<title>Node1263</title>
 <polygon fill="#ffffff" stroke="#bfbfbf" points="0,-19.5 0,-38.5 40,-38.5 40,-19.5 0,-19.5"/>
 <text text-anchor="middle" x="20" y="-26.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Error</text>
 </g>
@@ -24,24 +24,24 @@
 </a>
 </g>
 </g>
-<!-- Node1262&#45;&gt;Node0 -->
+<!-- Node1263&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
-<title>Node1262&#45;&gt;Node0</title>
+<title>Node1263&#45;&gt;Node0</title>
 <path fill="none" stroke="#191970" d="M50.1726,-34.2594C61.6171,-36.2544 74.8623,-38.5631 87.1902,-40.712"/>
 <polygon fill="#191970" stroke="#191970" points="50.6991,-30.7985 40.2466,-32.5292 49.497,-37.6945 50.6991,-30.7985"/>
 </g>
-<!-- Node1264 -->
+<!-- Node1265 -->
 <g id="node3" class="node">
-<title>Node1264</title>
+<title>Node1265</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1CompileError.html" target="_top" xlink:title="Custom Error class to be thrown during compilation. ">
 <polygon fill="#ffffff" stroke="#000000" points="76,-.5 76,-19.5 182,-19.5 182,-.5 76,-.5"/>
 <text text-anchor="middle" x="129" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::CompileError</text>
 </a>
 </g>
 </g>
-<!-- Node1262&#45;&gt;Node1264 -->
+<!-- Node1263&#45;&gt;Node1265 -->
 <g id="edge2" class="edge">
-<title>Node1262&#45;&gt;Node1264</title>
+<title>Node1263&#45;&gt;Node1265</title>
 <path fill="none" stroke="#191970" d="M50.1333,-23.7474C58.0955,-22.3595 66.9315,-20.8193 75.7249,-19.2865"/>
 <polygon fill="#191970" stroke="#191970" points="49.497,-20.3055 40.2466,-25.4708 50.6991,-27.2015 49.497,-20.3055"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_108.svg b/docs/reference/api/doxygen/inherit_graph_108.svg
index c190cac7f..c5e0a57b1 100644
--- a/docs/reference/api/doxygen/inherit_graph_108.svg
+++ b/docs/reference/api/doxygen/inherit_graph_108.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 11728)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-11728 1069,-11728 1069,4 -4,4"/>
-<!-- Node1272 -->
+<!-- Node1273 -->
 <g id="node1" class="node">
-<title>Node1272</title>
+<title>Node1273</title>
 <g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html" target="_top" xlink:title="The container base structure contains all the fields except for the Object header. ">
 <polygon fill="#ffffff" stroke="#000000" points="20,-10273 20,-10303 148,-10303 148,-10273 20,-10273"/>
 <text text-anchor="start" x="28" y="-10291" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::NDArray</text>
@@ -29,15 +29,15 @@
 </a>
 </g>
 </g>
-<!-- Node1272&#45;&gt;Node521 -->
+<!-- Node1273&#45;&gt;Node521 -->
 <g id="edge1" class="edge">
-<title>Node1272&#45;&gt;Node521</title>
+<title>Node1273&#45;&gt;Node521</title>
 <path fill="none" stroke="#191970" d="M158.2796,-10287.3278C184.9735,-10287.0862 214.8383,-10286.8159 240.6206,-10286.5826"/>
 <polygon fill="#191970" stroke="#191970" points="158.1871,-10283.8284 148.2192,-10287.4188 158.2505,-10290.8281 158.1871,-10283.8284"/>
 </g>
-<!-- Node1219 -->
+<!-- Node1220 -->
 <g id="node3" class="node">
-<title>Node1219</title>
+<title>Node1220</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1runtime_1_1InplaceArrayBase.html" target="_top" xlink:title="Base template for classes with array like memory layout. ">
 <polygon fill="#ffffff" stroke="#000000" points="222.5,-2592 222.5,-2622 387.5,-2622 387.5,-2592 222.5,-2592"/>
 <text text-anchor="start" x="230.5" y="-2610" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::InplaceArray</text>
@@ -54,15 +54,15 @@
 </a>
 </g>
 </g>
-<!-- Node1219&#45;&gt;Node512 -->
+<!-- Node1220&#45;&gt;Node512 -->
 <g id="edge2" class="edge">
-<title>Node1219&#45;&gt;Node512</title>
+<title>Node1220&#45;&gt;Node512</title>
 <path fill="none" stroke="#191970" d="M396.2793,-2591.0092C399.8373,-2588.3986 403.1114,-2585.4124 406,-2582 473.3926,-2502.387 372.631,-2191.897 442,-2114 446.817,-2108.5908 452.6778,-2104.4451 459.1102,-2101.2939"/>
 <polygon fill="#191970" stroke="#191970" points="394.1669,-2588.2006 387.5096,-2596.4426 397.8536,-2594.1511 394.1669,-2588.2006"/>
 </g>
-<!-- Node1218 -->
+<!-- Node1219 -->
 <g id="node5" class="node">
-<title>Node1218</title>
+<title>Node1219</title>
 <g id="a_node5"><a xlink:href="classtvm_1_1runtime_1_1InplaceArrayBase.html" target="_top" xlink:title="tvm::runtime::InplaceArray\lBase\&lt; ADTObj, ObjectRef \&gt;">
 <polygon fill="#ffffff" stroke="#000000" points="7.5,-10224 7.5,-10254 160.5,-10254 160.5,-10224 7.5,-10224"/>
 <text text-anchor="start" x="15.5" y="-10242" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::InplaceArray</text>
@@ -79,15 +79,15 @@
 </a>
 </g>
 </g>
-<!-- Node1218&#45;&gt;Node505 -->
+<!-- Node1219&#45;&gt;Node505 -->
 <g id="edge3" class="edge">
-<title>Node1218&#45;&gt;Node505</title>
+<title>Node1219&#45;&gt;Node505</title>
 <path fill="none" stroke="#191970" d="M170.9456,-10239C195.1707,-10239 220.9797,-10239 243.4174,-10239"/>
 <polygon fill="#191970" stroke="#191970" points="170.6749,-10235.5001 160.6748,-10239 170.6748,-10242.5001 170.6749,-10235.5001"/>
 </g>
-<!-- Node1217 -->
+<!-- Node1218 -->
 <g id="node7" class="node">
-<title>Node1217</title>
+<title>Node1218</title>
 <g id="a_node7"><a xlink:href="classtvm_1_1runtime_1_1InplaceArrayBase.html" target="_top" xlink:title="tvm::runtime::InplaceArray\lBase\&lt; ArrayNode, ObjectRef \&gt;">
 <polygon fill="#ffffff" stroke="#000000" points="0,-10175 0,-10205 168,-10205 168,-10175 0,-10175"/>
 <text text-anchor="start" x="8" y="-10193" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::InplaceArray</text>
@@ -104,9 +104,9 @@
 </a>
 </g>
 </g>
-<!-- Node1217&#45;&gt;Node506 -->
+<!-- Node1218&#45;&gt;Node506 -->
 <g id="edge4" class="edge">
-<title>Node1217&#45;&gt;Node506</title>
+<title>Node1218&#45;&gt;Node506</title>
 <path fill="none" stroke="#191970" d="M178.2093,-10194.6892C197.5291,-10195.6508 217.5282,-10196.6462 235.774,-10197.5544"/>
 <polygon fill="#191970" stroke="#191970" points="178.3427,-10191.1916 168.1811,-10194.19 177.9947,-10198.1829 178.3427,-10191.1916"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_163.svg b/docs/reference/api/doxygen/inherit_graph_163.svg
index c7b9a8a95..be11262b9 100644
--- a/docs/reference/api/doxygen/inherit_graph_163.svg
+++ b/docs/reference/api/doxygen/inherit_graph_163.svg
@@ -4,21 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="158pt" height="72pt"
- viewBox="0.00 0.00 158.00 72.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 68)">
+<svg width="181pt" height="39pt"
+ viewBox="0.00 0.00 181.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-68 154,-68 154,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 177,-35 177,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; te::Schedule(const\l Attrs &amp;attrs, const Array\l\&lt; te::Tensor \&gt; &amp;outs, const\l Target &amp;target)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-63.5 150,-63.5 150,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; te::Schedule(const</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Attrs &amp;attrs, const Array</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; te::Tensor &gt; &amp;outs, const</text>
-<text text-anchor="middle" x="75" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Target &amp;target)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; TargetJSON(TargetJSON)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 173,-30.5 173,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="middle" x="86.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; TargetJSON(TargetJSON)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_164.svg b/docs/reference/api/doxygen/inherit_graph_164.svg
index 459441f8f..c7b9a8a95 100644
--- a/docs/reference/api/doxygen/inherit_graph_164.svg
+++ b/docs/reference/api/doxygen/inherit_graph_164.svg
@@ -4,18 +4,21 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="157pt" height="39pt"
- viewBox="0.00 0.00 157.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="158pt" height="72pt"
+ viewBox="0.00 0.00 158.00 72.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 68)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 153,-35 153,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-68 154,-68 154,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void()\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 149,-30.5 149,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void()&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; te::Schedule(const\l Attrs &amp;attrs, const Array\l\&lt; te::Tensor \&gt; &amp;outs, const\l Target &amp;target)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-63.5 150,-63.5 150,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; te::Schedule(const</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Attrs &amp;attrs, const Array</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; te::Tensor &gt; &amp;outs, const</text>
+<text text-anchor="middle" x="75" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Target &amp;target)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_165.svg b/docs/reference/api/doxygen/inherit_graph_165.svg
index 34a550999..459441f8f 100644
--- a/docs/reference/api/doxygen/inherit_graph_165.svg
+++ b/docs/reference/api/doxygen/inherit_graph_165.svg
@@ -4,20 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="192pt" height="61pt"
- viewBox="0.00 0.00 192.00 61.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 57)">
+<svg width="157pt" height="39pt"
+ viewBox="0.00 0.00 157.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-57 188,-57 188,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 153,-35 153,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const Array\l\&lt; MeasureCandidate \&gt; &amp;,\l const Array\&lt; RunnerResult \&gt; &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-52.5 184,-52.5 184,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const Array</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; MeasureCandidate &gt; &amp;,</text>
-<text text-anchor="middle" x="92" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> const Array&lt; RunnerResult &gt; &amp;)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void()\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 149,-30.5 149,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void()&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_166.svg b/docs/reference/api/doxygen/inherit_graph_166.svg
index ce96f228c..34a550999 100644
--- a/docs/reference/api/doxygen/inherit_graph_166.svg
+++ b/docs/reference/api/doxygen/inherit_graph_166.svg
@@ -4,21 +4,20 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="181pt" height="72pt"
- viewBox="0.00 0.00 181.00 72.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 68)">
+<svg width="192pt" height="61pt"
+ viewBox="0.00 0.00 192.00 61.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 57)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-68 177,-68 177,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-57 188,-57 188,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const Array\l\&lt; tir::Schedule \&gt; &amp;, const\l Optional\&lt; Database \&gt; &amp;, const\l Optional\&lt; CostModel \&gt; &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-63.5 173,-63.5 173,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const Array</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tir::Schedule &gt; &amp;, const</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Optional&lt; Database &gt; &amp;, const</text>
-<text text-anchor="middle" x="86.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Optional&lt; CostModel &gt; &amp;)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const Array\l\&lt; MeasureCandidate \&gt; &amp;,\l const Array\&lt; RunnerResult \&gt; &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-52.5 184,-52.5 184,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const Array</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; MeasureCandidate &gt; &amp;,</text>
+<text text-anchor="middle" x="92" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> const Array&lt; RunnerResult &gt; &amp;)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_167.svg b/docs/reference/api/doxygen/inherit_graph_167.svg
index 0895051dd..ce96f228c 100644
--- a/docs/reference/api/doxygen/inherit_graph_167.svg
+++ b/docs/reference/api/doxygen/inherit_graph_167.svg
@@ -4,23 +4,21 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="213pt" height="94pt"
- viewBox="0.00 0.00 213.00 94.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 90)">
+<svg width="181pt" height="72pt"
+ viewBox="0.00 0.00 181.00 72.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 68)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-90 209,-90 209,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-68 177,-68 177,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TaskScheduler\l &amp;task_scheduler, int task_id,\l const Array\&lt; MeasureCandidate\l \&gt; &amp;measure_candidates, const Array\l\&lt; BuilderResult \&gt; &amp;builds, const Array\l\&lt; RunnerResult \&gt; &amp;results)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-85.5 205,-85.5 205,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="start" x="8" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TaskScheduler</text>
-<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &amp;task_scheduler, int task_id,</text>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> const Array&lt; MeasureCandidate</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;measure_candidates, const Array</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; BuilderResult &gt; &amp;builds, const Array</text>
-<text text-anchor="middle" x="102.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; RunnerResult &gt; &amp;results)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const Array\l\&lt; tir::Schedule \&gt; &amp;, const\l Optional\&lt; Database \&gt; &amp;, const\l Optional\&lt; CostModel \&gt; &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-63.5 173,-63.5 173,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const Array</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tir::Schedule &gt; &amp;, const</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Optional&lt; Database &gt; &amp;, const</text>
+<text text-anchor="middle" x="86.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Optional&lt; CostModel &gt; &amp;)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_168.svg b/docs/reference/api/doxygen/inherit_graph_168.svg
index 9495e35e0..0895051dd 100644
--- a/docs/reference/api/doxygen/inherit_graph_168.svg
+++ b/docs/reference/api/doxygen/inherit_graph_168.svg
@@ -4,18 +4,23 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="195pt" height="39pt"
- viewBox="0.00 0.00 195.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="213pt" height="94pt"
+ viewBox="0.00 0.00 213.00 94.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 90)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 191,-35 191,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-90 209,-90 209,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuneContext &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 187,-30.5 187,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="93.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuneContext &amp;)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TaskScheduler\l &amp;task_scheduler, int task_id,\l const Array\&lt; MeasureCandidate\l \&gt; &amp;measure_candidates, const Array\l\&lt; BuilderResult \&gt; &amp;builds, const Array\l\&lt; RunnerResult \&gt; &amp;results)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-85.5 205,-85.5 205,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="start" x="8" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TaskScheduler</text>
+<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &amp;task_scheduler, int task_id,</text>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> const Array&lt; MeasureCandidate</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;measure_candidates, const Array</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; BuilderResult &gt; &amp;builds, const Array</text>
+<text text-anchor="middle" x="102.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; RunnerResult &gt; &amp;results)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_169.svg b/docs/reference/api/doxygen/inherit_graph_169.svg
index bbfcd17b8..9495e35e0 100644
--- a/docs/reference/api/doxygen/inherit_graph_169.svg
+++ b/docs/reference/api/doxygen/inherit_graph_169.svg
@@ -4,20 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="217pt" height="61pt"
- viewBox="0.00 0.00 217.00 61.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 57)">
+<svg width="195pt" height="39pt"
+ viewBox="0.00 0.00 195.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-57 213,-57 213,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 191,-35 191,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuneContext\l &amp;, const Array\&lt; MeasureCandidate\l \&gt; &amp;, const Array\&lt; RunnerResult \&gt; &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-52.5 209,-52.5 209,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuneContext</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &amp;, const Array&lt; MeasureCandidate</text>
-<text text-anchor="middle" x="104.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuneContext &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 187,-30.5 187,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="middle" x="93.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuneContext &amp;)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_170.svg b/docs/reference/api/doxygen/inherit_graph_170.svg
index a27dfa822..bbfcd17b8 100644
--- a/docs/reference/api/doxygen/inherit_graph_170.svg
+++ b/docs/reference/api/doxygen/inherit_graph_170.svg
@@ -4,20 +4,20 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="199pt" height="61pt"
- viewBox="0.00 0.00 199.00 61.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="217pt" height="61pt"
+ viewBox="0.00 0.00 217.00 61.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 57)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-57 195,-57 195,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-57 213,-57 213,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuneContext\l &amp;, const Array\&lt; MeasureCandidate\l \&gt; &amp;, void *p_addr)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-52.5 191,-52.5 191,-.5 0,-.5"/>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuneContext\l &amp;, const Array\&lt; MeasureCandidate\l \&gt; &amp;, const Array\&lt; RunnerResult \&gt; &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-52.5 209,-52.5 209,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
 <text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuneContext</text>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &amp;, const Array&lt; MeasureCandidate</text>
-<text text-anchor="middle" x="95.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;, void *p_addr)&gt;</text>
+<text text-anchor="middle" x="104.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_171.svg b/docs/reference/api/doxygen/inherit_graph_171.svg
index ef937df40..a27dfa822 100644
--- a/docs/reference/api/doxygen/inherit_graph_171.svg
+++ b/docs/reference/api/doxygen/inherit_graph_171.svg
@@ -4,18 +4,20 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="199pt" height="39pt"
- viewBox="0.00 0.00 199.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="199pt" height="61pt"
+ viewBox="0.00 0.00 199.00 61.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 57)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 195,-35 195,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-57 195,-57 195,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuningRecord &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 191,-30.5 191,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="95.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuningRecord &amp;)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuneContext\l &amp;, const Array\&lt; MeasureCandidate\l \&gt; &amp;, void *p_addr)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-52.5 191,-52.5 191,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuneContext</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &amp;, const Array&lt; MeasureCandidate</text>
+<text text-anchor="middle" x="95.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;, void *p_addr)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_172.svg b/docs/reference/api/doxygen/inherit_graph_172.svg
index 69cf6cc4e..ef937df40 100644
--- a/docs/reference/api/doxygen/inherit_graph_172.svg
+++ b/docs/reference/api/doxygen/inherit_graph_172.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="157pt" height="39pt"
- viewBox="0.00 0.00 157.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="199pt" height="39pt"
+ viewBox="0.00 0.00 199.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 153,-35 153,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 195,-35 195,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(int)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 149,-30.5 149,-.5 0,-.5"/>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(const TuningRecord &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 191,-30.5 191,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(int)&gt;</text>
+<text text-anchor="middle" x="95.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(const TuningRecord &amp;)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_173.svg b/docs/reference/api/doxygen/inherit_graph_173.svg
index 67c530207..69cf6cc4e 100644
--- a/docs/reference/api/doxygen/inherit_graph_173.svg
+++ b/docs/reference/api/doxygen/inherit_graph_173.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="158pt" height="39pt"
- viewBox="0.00 0.00 158.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="157pt" height="39pt"
+ viewBox="0.00 0.00 157.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 154,-35 154,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 153,-35 153,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(size_t, void *)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 150,-30.5 150,-.5 0,-.5"/>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(int)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 149,-30.5 149,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="75" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(size_t, void *)&gt;</text>
+<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(int)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_174.svg b/docs/reference/api/doxygen/inherit_graph_174.svg
index c064ec3ca..67c530207 100644
--- a/docs/reference/api/doxygen/inherit_graph_174.svg
+++ b/docs/reference/api/doxygen/inherit_graph_174.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="157pt" height="39pt"
- viewBox="0.00 0.00 157.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="158pt" height="39pt"
+ viewBox="0.00 0.00 158.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 153,-35 153,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 154,-35 154,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(String)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 149,-30.5 149,-.5 0,-.5"/>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(size_t, void *)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 150,-30.5 150,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(String)&gt;</text>
+<text text-anchor="middle" x="75" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(size_t, void *)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_175.svg b/docs/reference/api/doxygen/inherit_graph_175.svg
index 433d91f4c..c064ec3ca 100644
--- a/docs/reference/api/doxygen/inherit_graph_175.svg
+++ b/docs/reference/api/doxygen/inherit_graph_175.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="228pt" height="39pt"
- viewBox="0.00 0.00 228.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="157pt" height="39pt"
+ viewBox="0.00 0.00 157.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 224,-35 224,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 153,-35 153,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(tvm::DiagnosticContext ctx)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 220,-30.5 220,-.5 0,-.5"/>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(String)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 149,-30.5 149,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="110" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(tvm::DiagnosticContext ctx)&gt;</text>
+<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(String)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_176.svg b/docs/reference/api/doxygen/inherit_graph_176.svg
index e156b80d3..433d91f4c 100644
--- a/docs/reference/api/doxygen/inherit_graph_176.svg
+++ b/docs/reference/api/doxygen/inherit_graph_176.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="204pt" height="39pt"
- viewBox="0.00 0.00 204.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="228pt" height="39pt"
+ viewBox="0.00 0.00 228.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 200,-35 200,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 224,-35 224,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; Workload(const IRModule &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 196,-30.5 196,-.5 0,-.5"/>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; void(tvm::DiagnosticContext ctx)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 220,-30.5 220,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
-<text text-anchor="middle" x="98" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; Workload(const IRModule &amp;)&gt;</text>
+<text text-anchor="middle" x="110" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; void(tvm::DiagnosticContext ctx)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_177.svg b/docs/reference/api/doxygen/inherit_graph_177.svg
index 55f7b8f15..e156b80d3 100644
--- a/docs/reference/api/doxygen/inherit_graph_177.svg
+++ b/docs/reference/api/doxygen/inherit_graph_177.svg
@@ -4,17 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="144pt" height="28pt"
- viewBox="0.00 0.00 144.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="204pt" height="39pt"
+ viewBox="0.00 0.00 204.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 140,-24 140,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 200,-35 200,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1TypeIndex.html" target="_top" xlink:title="Namespace for the list of type index. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 136,-19.5 136,-.5 0,-.5"/>
-<text text-anchor="middle" x="68" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypeIndex</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1TypedPackedFunc.html" target="_top" xlink:title="tvm::runtime::TypedPacked\lFunc\&lt; Workload(const IRModule &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 196,-30.5 196,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypedPacked</text>
+<text text-anchor="middle" x="98" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Func&lt; Workload(const IRModule &amp;)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_178.svg b/docs/reference/api/doxygen/inherit_graph_178.svg
index d2ddf678c..55f7b8f15 100644
--- a/docs/reference/api/doxygen/inherit_graph_178.svg
+++ b/docs/reference/api/doxygen/inherit_graph_178.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="160pt" height="28pt"
- viewBox="0.00 0.00 160.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="144pt" height="28pt"
+ viewBox="0.00 0.00 144.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 156,-24 156,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 140,-24 140,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1vm_1_1Allocator.html" target="_top" xlink:title="tvm::runtime::vm::Allocator">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 152,-19.5 152,-.5 0,-.5"/>
-<text text-anchor="middle" x="76" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Allocator</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1TypeIndex.html" target="_top" xlink:title="Namespace for the list of type index. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 136,-19.5 136,-.5 0,-.5"/>
+<text text-anchor="middle" x="68" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::TypeIndex</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_179.svg b/docs/reference/api/doxygen/inherit_graph_179.svg
index e6c532cc9..d2ddf678c 100644
--- a/docs/reference/api/doxygen/inherit_graph_179.svg
+++ b/docs/reference/api/doxygen/inherit_graph_179.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="145pt" height="28pt"
- viewBox="0.00 0.00 145.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="160pt" height="28pt"
+ viewBox="0.00 0.00 160.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 141,-24 141,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 156,-24 156,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1Buffer.html" target="_top" xlink:title="tvm::runtime::vm::Buffer">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 137,-19.5 137,-.5 0,-.5"/>
-<text text-anchor="middle" x="68.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Buffer</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1vm_1_1Allocator.html" target="_top" xlink:title="tvm::runtime::vm::Allocator">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 152,-19.5 152,-.5 0,-.5"/>
+<text text-anchor="middle" x="76" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Allocator</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_180.svg b/docs/reference/api/doxygen/inherit_graph_180.svg
index 3aa085fac..e6c532cc9 100644
--- a/docs/reference/api/doxygen/inherit_graph_180.svg
+++ b/docs/reference/api/doxygen/inherit_graph_180.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="168pt" height="28pt"
- viewBox="0.00 0.00 168.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="145pt" height="28pt"
+ viewBox="0.00 0.00 145.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 164,-24 164,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 141,-24 141,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1Instruction.html" target="_top" xlink:title="A single virtual machine instruction. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 160,-19.5 160,-.5 0,-.5"/>
-<text text-anchor="middle" x="80" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Instruction</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1Buffer.html" target="_top" xlink:title="tvm::runtime::vm::Buffer">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 137,-19.5 137,-.5 0,-.5"/>
+<text text-anchor="middle" x="68.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Buffer</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_181.svg b/docs/reference/api/doxygen/inherit_graph_181.svg
index 48c039fcd..3aa085fac 100644
--- a/docs/reference/api/doxygen/inherit_graph_181.svg
+++ b/docs/reference/api/doxygen/inherit_graph_181.svg
@@ -4,18 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="156pt" height="39pt"
- viewBox="0.00 0.00 156.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="168pt" height="28pt"
+ viewBox="0.00 0.00 168.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 152,-35 152,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 164,-24 164,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1vm_1_1MemoryManager.html" target="_top" xlink:title="tvm::runtime::vm::Memory\lManager">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 148,-30.5 148,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Memory</text>
-<text text-anchor="middle" x="74" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Manager</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1Instruction.html" target="_top" xlink:title="A single virtual machine instruction. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 160,-19.5 160,-.5 0,-.5"/>
+<text text-anchor="middle" x="80" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Instruction</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_182.svg b/docs/reference/api/doxygen/inherit_graph_182.svg
index c7e29becb..48c039fcd 100644
--- a/docs/reference/api/doxygen/inherit_graph_182.svg
+++ b/docs/reference/api/doxygen/inherit_graph_182.svg
@@ -4,17 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="161pt" height="28pt"
- viewBox="0.00 0.00 161.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="156pt" height="39pt"
+ viewBox="0.00 0.00 156.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 157,-24 157,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 152,-35 152,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html" target="_top" xlink:title="A representation of a stack frame. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 153,-19.5 153,-.5 0,-.5"/>
-<text text-anchor="middle" x="76.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::VMFrame</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1vm_1_1MemoryManager.html" target="_top" xlink:title="tvm::runtime::vm::Memory\lManager">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 148,-30.5 148,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::Memory</text>
+<text text-anchor="middle" x="74" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Manager</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_183.svg b/docs/reference/api/doxygen/inherit_graph_183.svg
index 04b880c91..c7e29becb 100644
--- a/docs/reference/api/doxygen/inherit_graph_183.svg
+++ b/docs/reference/api/doxygen/inherit_graph_183.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="172pt" height="28pt"
- viewBox="0.00 0.00 172.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="161pt" height="28pt"
+ viewBox="0.00 0.00 161.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 168,-24 168,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 157,-24 157,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html" target="_top" xlink:title="A representation of a Relay function in the VM. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 164,-19.5 164,-.5 0,-.5"/>
-<text text-anchor="middle" x="82" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::VMFunction</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html" target="_top" xlink:title="A representation of a stack frame. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 153,-19.5 153,-.5 0,-.5"/>
+<text text-anchor="middle" x="76.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::VMFrame</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_184.svg b/docs/reference/api/doxygen/inherit_graph_184.svg
index 35a5d586d..04b880c91 100644
--- a/docs/reference/api/doxygen/inherit_graph_184.svg
+++ b/docs/reference/api/doxygen/inherit_graph_184.svg
@@ -4,18 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="122pt" height="39pt"
- viewBox="0.00 0.00 122.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="172pt" height="28pt"
+ viewBox="0.00 0.00 172.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 118,-35 118,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 168,-24 168,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_top" xlink:title="Internal handler that defines custom behaviors.. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 114,-30.5 114,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::SEqualReducer</text>
-<text text-anchor="middle" x="57" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Handler</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html" target="_top" xlink:title="A representation of a Relay function in the VM. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 164,-19.5 164,-.5 0,-.5"/>
+<text text-anchor="middle" x="82" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::vm::VMFunction</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_185.svg b/docs/reference/api/doxygen/inherit_graph_185.svg
index 5531358c0..35a5d586d 100644
--- a/docs/reference/api/doxygen/inherit_graph_185.svg
+++ b/docs/reference/api/doxygen/inherit_graph_185.svg
@@ -4,17 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="120pt" height="28pt"
- viewBox="0.00 0.00 120.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="122pt" height="39pt"
+ viewBox="0.00 0.00 122.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 116,-24 116,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 118,-35 118,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1SHashReducer.html" target="_top" xlink:title="A Reducer class to reduce the structural hash value. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 112,-19.5 112,-.5 0,-.5"/>
-<text text-anchor="middle" x="56" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::SHashReducer</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_top" xlink:title="Internal handler that defines custom behaviors.. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 114,-30.5 114,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::SEqualReducer</text>
+<text text-anchor="middle" x="57" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Handler</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_186.svg b/docs/reference/api/doxygen/inherit_graph_186.svg
index ba4b17214..5531358c0 100644
--- a/docs/reference/api/doxygen/inherit_graph_186.svg
+++ b/docs/reference/api/doxygen/inherit_graph_186.svg
@@ -4,18 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="126pt" height="39pt"
- viewBox="0.00 0.00 126.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="120pt" height="28pt"
+ viewBox="0.00 0.00 120.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 122,-35 122,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 116,-24 116,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1SHashReducer_1_1Handler.html" target="_top" xlink:title="Internal handler that defines custom behaviors. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 118,-30.5 118,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::SHashReducer::</text>
-<text text-anchor="middle" x="59" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Handler</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1SHashReducer.html" target="_top" xlink:title="A Reducer class to reduce the structural hash value. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 112,-19.5 112,-.5 0,-.5"/>
+<text text-anchor="middle" x="56" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::SHashReducer</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_187.svg b/docs/reference/api/doxygen/inherit_graph_187.svg
index dba84fc0a..ba4b17214 100644
--- a/docs/reference/api/doxygen/inherit_graph_187.svg
+++ b/docs/reference/api/doxygen/inherit_graph_187.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="185pt" height="39pt"
- viewBox="0.00 0.00 185.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="126pt" height="39pt"
+ viewBox="0.00 0.00 126.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 181,-35 181,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 122,-35 122,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1support_1_1LinearCongruentialEngine.html" target="_top" xlink:title="This linear congruential engine is a drop&#45;in replacement for std::minstd_rand. It strictly correspond...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 177,-30.5 177,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::support::LinearCongruential</text>
-<text text-anchor="middle" x="88.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Engine</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1SHashReducer_1_1Handler.html" target="_top" xlink:title="Internal handler that defines custom behaviors. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 118,-30.5 118,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::SHashReducer::</text>
+<text text-anchor="middle" x="59" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Handler</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_188.svg b/docs/reference/api/doxygen/inherit_graph_188.svg
index 62749d106..dba84fc0a 100644
--- a/docs/reference/api/doxygen/inherit_graph_188.svg
+++ b/docs/reference/api/doxygen/inherit_graph_188.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="117pt" height="39pt"
- viewBox="0.00 0.00 117.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="185pt" height="39pt"
+ viewBox="0.00 0.00 185.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 113,-35 113,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 181,-35 181,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1support_1_1Span.html" target="_top" xlink:title="A partial implementation of the C++20 std::span. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 109,-30.5 109,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::support::Span</text>
-<text text-anchor="middle" x="54.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; T, W &gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1support_1_1LinearCongruentialEngine.html" target="_top" xlink:title="This linear congruential engine is a drop&#45;in replacement for std::minstd_rand. It strictly correspond...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 177,-30.5 177,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::support::LinearCongruential</text>
+<text text-anchor="middle" x="88.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Engine</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_189.svg b/docs/reference/api/doxygen/inherit_graph_189.svg
index a9db5ab08..62749d106 100644
--- a/docs/reference/api/doxygen/inherit_graph_189.svg
+++ b/docs/reference/api/doxygen/inherit_graph_189.svg
@@ -4,32 +4,20 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="267pt" height="39pt"
- viewBox="0.00 0.00 267.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="117pt" height="39pt"
+ viewBox="0.00 0.00 117.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 263,-35 263,4 -4,4"/>
-<!-- Node1208 -->
-<g id="node1" class="node">
-<title>Node1208</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="0,-6 0,-25 53,-25 53,-6 0,-6"/>
-<text text-anchor="middle" x="26.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">iterator</text>
-</g>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 113,-35 113,4 -4,4"/>
 <!-- Node0 -->
-<g id="node2" class="node">
+<g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node2"><a xlink:href="classtvm_1_1support_1_1Span_1_1iterator__base.html" target="_top" xlink:title="tvm::support::Span\l\&lt; T, W \&gt;::iterator_base\&lt; W1 \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="89,-.5 89,-30.5 259,-30.5 259,-.5 89,-.5"/>
-<text text-anchor="start" x="97" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::support::Span</text>
-<text text-anchor="middle" x="174" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; T, W &gt;::iterator_base&lt; W1 &gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1support_1_1Span.html" target="_top" xlink:title="A partial implementation of the C++20 std::span. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 109,-30.5 109,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::support::Span</text>
+<text text-anchor="middle" x="54.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; T, W &gt;</text>
 </a>
 </g>
 </g>
-<!-- Node1208&#45;&gt;Node0 -->
-<g id="edge1" class="edge">
-<title>Node1208&#45;&gt;Node0</title>
-<path fill="none" stroke="#191970" d="M63.4274,-15.5C71.2977,-15.5 79.9205,-15.5 88.7694,-15.5"/>
-<polygon fill="#191970" stroke="#191970" points="63.2112,-12.0001 53.2111,-15.5 63.2111,-19.0001 63.2112,-12.0001"/>
-</g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_190.svg b/docs/reference/api/doxygen/inherit_graph_190.svg
index 6790ab432..94a7c8f2d 100644
--- a/docs/reference/api/doxygen/inherit_graph_190.svg
+++ b/docs/reference/api/doxygen/inherit_graph_190.svg
@@ -4,19 +4,32 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="145pt" height="28pt"
- viewBox="0.00 0.00 145.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="267pt" height="39pt"
+ viewBox="0.00 0.00 267.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 141,-24 141,4 -4,4"/>
-<!-- Node0 -->
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 263,-35 263,4 -4,4"/>
+<!-- Node1209 -->
 <g id="node1" class="node">
+<title>Node1209</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="0,-6 0,-25 53,-25 53,-6 0,-6"/>
+<text text-anchor="middle" x="26.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">iterator</text>
+</g>
+<!-- Node0 -->
+<g id="node2" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TargetKindRegEntry.html" target="_top" xlink:title="Helper structure to register TargetKind. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 137,-19.5 137,-.5 0,-.5"/>
-<text text-anchor="middle" x="68.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindRegEntry</text>
+<g id="a_node2"><a xlink:href="classtvm_1_1support_1_1Span_1_1iterator__base.html" target="_top" xlink:title="tvm::support::Span\l\&lt; T, W \&gt;::iterator_base\&lt; W1 \&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="89,-.5 89,-30.5 259,-30.5 259,-.5 89,-.5"/>
+<text text-anchor="start" x="97" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::support::Span</text>
+<text text-anchor="middle" x="174" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; T, W &gt;::iterator_base&lt; W1 &gt;</text>
 </a>
 </g>
 </g>
+<!-- Node1209&#45;&gt;Node0 -->
+<g id="edge1" class="edge">
+<title>Node1209&#45;&gt;Node0</title>
+<path fill="none" stroke="#191970" d="M63.4274,-15.5C71.2977,-15.5 79.9205,-15.5 88.7694,-15.5"/>
+<polygon fill="#191970" stroke="#191970" points="63.2112,-12.0001 53.2111,-15.5 63.2111,-19.0001 63.2112,-12.0001"/>
+</g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_191.svg b/docs/reference/api/doxygen/inherit_graph_191.svg
index cca40743e..6790ab432 100644
--- a/docs/reference/api/doxygen/inherit_graph_191.svg
+++ b/docs/reference/api/doxygen/inherit_graph_191.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="140pt" height="28pt"
- viewBox="0.00 0.00 140.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="145pt" height="28pt"
+ viewBox="0.00 0.00 145.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 136,-24 136,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 141,-24 141,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TargetTagRegEntry.html" target="_top" xlink:title="tvm::TargetTagRegEntry">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 132,-19.5 132,-.5 0,-.5"/>
-<text text-anchor="middle" x="66" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetTagRegEntry</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TargetKindRegEntry.html" target="_top" xlink:title="Helper structure to register TargetKind. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 137,-19.5 137,-.5 0,-.5"/>
+<text text-anchor="middle" x="68.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetKindRegEntry</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_192.svg b/docs/reference/api/doxygen/inherit_graph_192.svg
index 296cfa66b..cca40743e 100644
--- a/docs/reference/api/doxygen/inherit_graph_192.svg
+++ b/docs/reference/api/doxygen/inherit_graph_192.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="127pt" height="28pt"
- viewBox="0.00 0.00 127.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="140pt" height="28pt"
+ viewBox="0.00 0.00 140.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 123,-24 123,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 136,-24 136,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1te_1_1Tensor_1_1Slice.html" target="_top" xlink:title="data structure to represent a slice that fixes first k coordinates. This is used to enable syntax sug...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 119,-19.5 119,-.5 0,-.5"/>
-<text text-anchor="middle" x="59.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::te::Tensor::Slice</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TargetTagRegEntry.html" target="_top" xlink:title="tvm::TargetTagRegEntry">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 132,-19.5 132,-.5 0,-.5"/>
+<text text-anchor="middle" x="66" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TargetTagRegEntry</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_193.svg b/docs/reference/api/doxygen/inherit_graph_193.svg
index 01ae6638a..296cfa66b 100644
--- a/docs/reference/api/doxygen/inherit_graph_193.svg
+++ b/docs/reference/api/doxygen/inherit_graph_193.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="121pt" height="28pt"
- viewBox="0.00 0.00 121.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="127pt" height="28pt"
+ viewBox="0.00 0.00 127.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 117,-24 117,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 123,-24 123,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1te_1_1TensorDom.html" target="_top" xlink:title="Temporary data structure to store union of bounds of each axis of Tensor. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 113,-19.5 113,-.5 0,-.5"/>
-<text text-anchor="middle" x="56.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::te::TensorDom</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1te_1_1Tensor_1_1Slice.html" target="_top" xlink:title="data structure to represent a slice that fixes first k coordinates. This is used to enable syntax sug...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 119,-19.5 119,-.5 0,-.5"/>
+<text text-anchor="middle" x="59.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::te::Tensor::Slice</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_194.svg b/docs/reference/api/doxygen/inherit_graph_194.svg
index 9c231a8c6..01ae6638a 100644
--- a/docs/reference/api/doxygen/inherit_graph_194.svg
+++ b/docs/reference/api/doxygen/inherit_graph_194.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="112pt" height="28pt"
- viewBox="0.00 0.00 112.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="121pt" height="28pt"
+ viewBox="0.00 0.00 121.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 108,-24 108,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 117,-24 117,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1tir_1_1BlockInfo.html" target="_top" xlink:title="The information about a TensorIR block, it contains two categories of information 1) Info on the bloc...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 104,-19.5 104,-.5 0,-.5"/>
-<text text-anchor="middle" x="52" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::BlockInfo</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1te_1_1TensorDom.html" target="_top" xlink:title="Temporary data structure to store union of bounds of each axis of Tensor. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 113,-19.5 113,-.5 0,-.5"/>
+<text text-anchor="middle" x="56.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::te::TensorDom</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_195.svg b/docs/reference/api/doxygen/inherit_graph_195.svg
index 425db0100..9c231a8c6 100644
--- a/docs/reference/api/doxygen/inherit_graph_195.svg
+++ b/docs/reference/api/doxygen/inherit_graph_195.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="141pt" height="28pt"
- viewBox="0.00 0.00 141.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="112pt" height="28pt"
+ viewBox="0.00 0.00 112.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 137,-24 137,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 108,-24 108,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm_1_1tir_1_1ExprDeepEqual.html" target="_top" xlink:title="Compare two expressions recursively and check if they are equal to each other without var remapping...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 133,-19.5 133,-.5 0,-.5"/>
-<text text-anchor="middle" x="66.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprDeepEqual</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1tir_1_1BlockInfo.html" target="_top" xlink:title="The information about a TensorIR block, it contains two categories of information 1) Info on the bloc...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 104,-19.5 104,-.5 0,-.5"/>
+<text text-anchor="middle" x="52" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::BlockInfo</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_196.svg b/docs/reference/api/doxygen/inherit_graph_196.svg
index 27a798cc0..425db0100 100644
--- a/docs/reference/api/doxygen/inherit_graph_196.svg
+++ b/docs/reference/api/doxygen/inherit_graph_196.svg
@@ -4,18 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="124pt" height="39pt"
- viewBox="0.00 0.00 124.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="141pt" height="28pt"
+ viewBox="0.00 0.00 141.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 120,-35 120,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 137,-24 137,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 116,-30.5 116,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
-<text text-anchor="middle" x="58" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; FType &gt;</text>
+<g id="a_node1"><a xlink:href="structtvm_1_1tir_1_1ExprDeepEqual.html" target="_top" xlink:title="Compare two expressions recursively and check if they are equal to each other without var remapping...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 133,-19.5 133,-.5 0,-.5"/>
+<text text-anchor="middle" x="66.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprDeepEqual</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_197.svg b/docs/reference/api/doxygen/inherit_graph_197.svg
index 48689c359..27a798cc0 100644
--- a/docs/reference/api/doxygen/inherit_graph_197.svg
+++ b/docs/reference/api/doxygen/inherit_graph_197.svg
@@ -4,19 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="139pt" height="50pt"
- viewBox="0.00 0.00 139.00 50.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 46)">
+<svg width="124pt" height="39pt"
+ viewBox="0.00 0.00 124.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-46 135,-46 135,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 120,-35 120,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html" target="_top" xlink:title="tvm::tir::ExprFunctor\l\&lt; R(const PrimExpr &amp;n,\l Args...)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-41.5 131,-41.5 131,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; R(const PrimExpr &amp;n,</text>
-<text text-anchor="middle" x="65.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Args...)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 116,-30.5 116,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
+<text text-anchor="middle" x="58" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; FType &gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_198.svg b/docs/reference/api/doxygen/inherit_graph_198.svg
index 0df1022f1..48689c359 100644
--- a/docs/reference/api/doxygen/inherit_graph_198.svg
+++ b/docs/reference/api/doxygen/inherit_graph_198.svg
@@ -4,18 +4,19 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="140pt" height="39pt"
- viewBox="0.00 0.00 140.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="139pt" height="50pt"
+ viewBox="0.00 0.00 139.00 50.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 46)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 136,-35 136,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-46 135,-46 135,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1InstructionKindRegEntry.html" target="_top" xlink:title="An entry in the registry of InstructionKind. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 132,-30.5 132,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::InstructionKind</text>
-<text text-anchor="middle" x="66" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">RegEntry</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html" target="_top" xlink:title="tvm::tir::ExprFunctor\l\&lt; R(const PrimExpr &amp;n,\l Args...)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-41.5 131,-41.5 131,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; R(const PrimExpr &amp;n,</text>
+<text text-anchor="middle" x="65.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> Args...)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_199.svg b/docs/reference/api/doxygen/inherit_graph_199.svg
index dabc7ca0b..0df1022f1 100644
--- a/docs/reference/api/doxygen/inherit_graph_199.svg
+++ b/docs/reference/api/doxygen/inherit_graph_199.svg
@@ -4,17 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="121pt" height="28pt"
- viewBox="0.00 0.00 121.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="140pt" height="39pt"
+ viewBox="0.00 0.00 140.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 117,-24 117,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 136,-35 136,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1LayoutAxis.html" target="_top" xlink:title="tvm::tir::LayoutAxis">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 113,-19.5 113,-.5 0,-.5"/>
-<text text-anchor="middle" x="56.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::LayoutAxis</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1InstructionKindRegEntry.html" target="_top" xlink:title="An entry in the registry of InstructionKind. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 132,-30.5 132,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::InstructionKind</text>
+<text text-anchor="middle" x="66" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">RegEntry</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_200.svg b/docs/reference/api/doxygen/inherit_graph_200.svg
index adffd094d..dabc7ca0b 100644
--- a/docs/reference/api/doxygen/inherit_graph_200.svg
+++ b/docs/reference/api/doxygen/inherit_graph_200.svg
@@ -4,18 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="114pt" height="39pt"
- viewBox="0.00 0.00 114.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="121pt" height="28pt"
+ viewBox="0.00 0.00 121.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 110,-35 110,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 117,-24 117,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html" target="_top" xlink:title="Helper class to flatten sequence of arguments into Array. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 106,-30.5 106,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::SeqStmt::</text>
-<text text-anchor="middle" x="53" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Flattener</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1LayoutAxis.html" target="_top" xlink:title="tvm::tir::LayoutAxis">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 113,-19.5 113,-.5 0,-.5"/>
+<text text-anchor="middle" x="56.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::LayoutAxis</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_201.svg b/docs/reference/api/doxygen/inherit_graph_201.svg
index 1267141b2..adffd094d 100644
--- a/docs/reference/api/doxygen/inherit_graph_201.svg
+++ b/docs/reference/api/doxygen/inherit_graph_201.svg
@@ -4,18 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="125pt" height="39pt"
- viewBox="0.00 0.00 125.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="114pt" height="39pt"
+ viewBox="0.00 0.00 114.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 121,-35 121,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 110,-35 110,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="Same as ExprFunctor except it is applied on statements. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 117,-30.5 117,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
-<text text-anchor="middle" x="58.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; FType &gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html" target="_top" xlink:title="Helper class to flatten sequence of arguments into Array. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 106,-30.5 106,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::SeqStmt::</text>
+<text text-anchor="middle" x="53" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Flattener</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_202.svg b/docs/reference/api/doxygen/inherit_graph_202.svg
index 5a3723be6..1267141b2 100644
--- a/docs/reference/api/doxygen/inherit_graph_202.svg
+++ b/docs/reference/api/doxygen/inherit_graph_202.svg
@@ -4,19 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="152pt" height="50pt"
- viewBox="0.00 0.00 152.00 50.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 46)">
+<svg width="125pt" height="39pt"
+ viewBox="0.00 0.00 125.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-46 148,-46 148,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 121,-35 121,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html" target="_top" xlink:title="tvm::tir::StmtFunctor\l\&lt; R(const Stmt &amp;n, Args...\l args)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-41.5 144,-41.5 144,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; R(const Stmt &amp;n, Args...</text>
-<text text-anchor="middle" x="72" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> args)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="Same as ExprFunctor except it is applied on statements. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 117,-30.5 117,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
+<text text-anchor="middle" x="58.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; FType &gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_203.svg b/docs/reference/api/doxygen/inherit_graph_203.svg
index 94abe359c..5a3723be6 100644
--- a/docs/reference/api/doxygen/inherit_graph_203.svg
+++ b/docs/reference/api/doxygen/inherit_graph_203.svg
@@ -4,81 +4,21 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="512pt" height="88pt"
- viewBox="0.00 0.00 512.00 88.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 84)">
+<svg width="152pt" height="50pt"
+ viewBox="0.00 0.00 152.00 50.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 46)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-84 508,-84 508,4 -4,4"/>
-<!-- Node1251 -->
-<g id="node1" class="node">
-<title>Node1251</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="tvm::tir::ExprFunctor\l\&lt; PrimExpr(const PrimExpr &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-49.5 0,-79.5 172,-79.5 172,-49.5 0,-49.5"/>
-<text text-anchor="start" x="8" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
-<text text-anchor="middle" x="86" y="-56.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; PrimExpr(const PrimExpr &amp;)&gt;</text>
-</a>
-</g>
-</g>
-<!-- Node1252 -->
-<g id="node2" class="node">
-<title>Node1252</title>
-<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1ExprMutator.html" target="_top" xlink:title="ExprMutator that mutates expressions. ">
-<polygon fill="#ffffff" stroke="#000000" points="208.5,-52 208.5,-71 326.5,-71 326.5,-52 208.5,-52"/>
-<text text-anchor="middle" x="267.5" y="-59" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprMutator</text>
-</a>
-</g>
-</g>
-<!-- Node1251&#45;&gt;Node1252 -->
-<g id="edge1" class="edge">
-<title>Node1251&#45;&gt;Node1252</title>
-<path fill="none" stroke="#006400" d="M182.2922,-62.9084C191.1922,-62.7613 200.0208,-62.6154 208.434,-62.4763"/>
-<polygon fill="#006400" stroke="#006400" points="182.1726,-59.4098 172.2319,-63.0747 182.2884,-66.4088 182.1726,-59.4098"/>
-</g>
-<!-- Node2 -->
-<g id="node3" class="node">
-<title>Node2</title>
-<g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_top" xlink:title="Mutator that recursively mutates stmts and exprs on them. ">
-<polygon fill="#ffffff" stroke="#000000" points="363,-30 363,-49 504,-49 504,-30 363,-30"/>
-<text text-anchor="middle" x="433.5" y="-37" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
-</a>
-</g>
-</g>
-<!-- Node1252&#45;&gt;Node2 -->
-<g id="edge2" class="edge">
-<title>Node1252&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M336.9096,-52.3011C345.4815,-51.1651 354.2522,-50.0027 362.8425,-48.8642"/>
-<polygon fill="#191970" stroke="#191970" points="336.4144,-48.8361 326.9609,-53.6196 337.3341,-55.7754 336.4144,-48.8361"/>
-</g>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-46 148,-46 148,4 -4,4"/>
 <!-- Node0 -->
-<g id="node4" class="node">
+<g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="tvm::tir::StmtFunctor\l\&lt; Stmt(const Stmt &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="21,-.5 21,-30.5 151,-30.5 151,-.5 21,-.5"/>
-<text text-anchor="start" x="29" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
-<text text-anchor="middle" x="86" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Stmt(const Stmt &amp;)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html" target="_top" xlink:title="tvm::tir::StmtFunctor\l\&lt; R(const Stmt &amp;n, Args...\l args)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-41.5 144,-41.5 144,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; R(const Stmt &amp;n, Args...</text>
+<text text-anchor="middle" x="72" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> args)&gt;</text>
 </a>
 </g>
 </g>
-<!-- Node1 -->
-<g id="node5" class="node">
-<title>Node1</title>
-<g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1StmtMutator.html" target="_top" xlink:title="StmtMutator that mutates the statements. ">
-<polygon fill="#ffffff" stroke="#000000" points="208,-11 208,-30 327,-30 327,-11 208,-11"/>
-<text text-anchor="middle" x="267.5" y="-18" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtMutator</text>
-</a>
-</g>
-</g>
-<!-- Node0&#45;&gt;Node1 -->
-<g id="edge3" class="edge">
-<title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#006400" d="M161.1234,-17.5695C176.6774,-17.998 192.863,-18.4439 207.7775,-18.8548"/>
-<polygon fill="#006400" stroke="#006400" points="161.1056,-14.0678 151.013,-17.291 160.9128,-21.0651 161.1056,-14.0678"/>
-</g>
-<!-- Node1&#45;&gt;Node2 -->
-<g id="edge4" class="edge">
-<title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M337.5536,-28.5182C345.9464,-29.4788 354.5203,-30.4602 362.9205,-31.4216"/>
-<polygon fill="#191970" stroke="#191970" points="337.763,-25.0194 327.4298,-27.3594 336.9669,-31.974 337.763,-25.0194"/>
-</g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_204.svg b/docs/reference/api/doxygen/inherit_graph_204.svg
index 4af8ef7da..6c44a450a 100644
--- a/docs/reference/api/doxygen/inherit_graph_204.svg
+++ b/docs/reference/api/doxygen/inherit_graph_204.svg
@@ -4,81 +4,81 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="477pt" height="88pt"
- viewBox="0.00 0.00 477.00 88.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="512pt" height="88pt"
+ viewBox="0.00 0.00 512.00 88.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 84)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-84 473,-84 473,4 -4,4"/>
-<!-- Node1244 -->
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-84 508,-84 508,4 -4,4"/>
+<!-- Node1252 -->
 <g id="node1" class="node">
-<title>Node1244</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="tvm::tir::ExprFunctor\l\&lt; void(const PrimExpr &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-49.5 0,-79.5 149,-79.5 149,-49.5 0,-49.5"/>
+<title>Node1252</title>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="tvm::tir::ExprFunctor\l\&lt; PrimExpr(const PrimExpr &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-49.5 0,-79.5 172,-79.5 172,-49.5 0,-49.5"/>
 <text text-anchor="start" x="8" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
-<text text-anchor="middle" x="74.5" y="-56.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; void(const PrimExpr &amp;)&gt;</text>
+<text text-anchor="middle" x="86" y="-56.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; PrimExpr(const PrimExpr &amp;)&gt;</text>
 </a>
 </g>
 </g>
-<!-- Node1245 -->
+<!-- Node1253 -->
 <g id="node2" class="node">
-<title>Node1245</title>
-<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1ExprVisitor.html" target="_top" xlink:title="ExprVisitor. ">
-<polygon fill="#ffffff" stroke="#000000" points="185.5,-52 185.5,-71 297.5,-71 297.5,-52 185.5,-52"/>
-<text text-anchor="middle" x="241.5" y="-59" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprVisitor</text>
+<title>Node1253</title>
+<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1ExprMutator.html" target="_top" xlink:title="ExprMutator that mutates expressions. ">
+<polygon fill="#ffffff" stroke="#000000" points="208.5,-52 208.5,-71 326.5,-71 326.5,-52 208.5,-52"/>
+<text text-anchor="middle" x="267.5" y="-59" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprMutator</text>
 </a>
 </g>
 </g>
-<!-- Node1244&#45;&gt;Node1245 -->
+<!-- Node1252&#45;&gt;Node1253 -->
 <g id="edge1" class="edge">
-<title>Node1244&#45;&gt;Node1245</title>
-<path fill="none" stroke="#191970" d="M159.2165,-62.9781C168.0933,-62.8187 176.9518,-62.6595 185.3905,-62.508"/>
-<polygon fill="#191970" stroke="#191970" points="159.1434,-59.4788 149.2079,-63.1579 159.2692,-66.4777 159.1434,-59.4788"/>
+<title>Node1252&#45;&gt;Node1253</title>
+<path fill="none" stroke="#006400" d="M182.2922,-62.9084C191.1922,-62.7613 200.0208,-62.6154 208.434,-62.4763"/>
+<polygon fill="#006400" stroke="#006400" points="182.1726,-59.4098 172.2319,-63.0747 182.2884,-66.4088 182.1726,-59.4098"/>
 </g>
 <!-- Node2 -->
 <g id="node3" class="node">
 <title>Node2</title>
-<g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1StmtExprVisitor.html" target="_top" xlink:title="Visitor that recursively visit stmts and exprs on them. ">
-<polygon fill="#ffffff" stroke="#000000" points="334,-30 334,-49 469,-49 469,-30 334,-30"/>
-<text text-anchor="middle" x="401.5" y="-37" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprVisitor</text>
+<g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_top" xlink:title="Mutator that recursively mutates stmts and exprs on them. ">
+<polygon fill="#ffffff" stroke="#000000" points="363,-30 363,-49 504,-49 504,-30 363,-30"/>
+<text text-anchor="middle" x="433.5" y="-37" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
 </a>
 </g>
 </g>
-<!-- Node1245&#45;&gt;Node2 -->
+<!-- Node1253&#45;&gt;Node2 -->
 <g id="edge2" class="edge">
-<title>Node1245&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M307.8508,-52.3768C316.4242,-51.1979 325.2131,-49.9895 333.8078,-48.8077"/>
-<polygon fill="#191970" stroke="#191970" points="307.34,-48.914 297.91,-53.7436 308.2936,-55.8487 307.34,-48.914"/>
+<title>Node1253&#45;&gt;Node2</title>
+<path fill="none" stroke="#191970" d="M336.9096,-52.3011C345.4815,-51.1651 354.2522,-50.0027 362.8425,-48.8642"/>
+<polygon fill="#191970" stroke="#191970" points="336.4144,-48.8361 326.9609,-53.6196 337.3341,-55.7754 336.4144,-48.8361"/>
 </g>
 <!-- Node0 -->
 <g id="node4" class="node">
 <title>Node0</title>
-<g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="tvm::tir::StmtFunctor\l\&lt; void(const Stmt &amp;)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="10.5,-.5 10.5,-30.5 138.5,-30.5 138.5,-.5 10.5,-.5"/>
-<text text-anchor="start" x="18.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
-<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; void(const Stmt &amp;)&gt;</text>
+<g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="tvm::tir::StmtFunctor\l\&lt; Stmt(const Stmt &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="21,-.5 21,-30.5 151,-30.5 151,-.5 21,-.5"/>
+<text text-anchor="start" x="29" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
+<text text-anchor="middle" x="86" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Stmt(const Stmt &amp;)&gt;</text>
 </a>
 </g>
 </g>
 <!-- Node1 -->
 <g id="node5" class="node">
 <title>Node1</title>
-<g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1StmtVisitor.html" target="_top" xlink:title="StmtVisitor. ">
-<polygon fill="#ffffff" stroke="#000000" points="185,-11 185,-30 298,-30 298,-11 185,-11"/>
-<text text-anchor="middle" x="241.5" y="-18" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtVisitor</text>
+<g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1StmtMutator.html" target="_top" xlink:title="StmtMutator that mutates the statements. ">
+<polygon fill="#ffffff" stroke="#000000" points="208,-11 208,-30 327,-30 327,-11 208,-11"/>
+<text text-anchor="middle" x="267.5" y="-18" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtMutator</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge3" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#006400" d="M148.7178,-17.7221C160.8779,-18.0862 173.3038,-18.4582 184.9469,-18.8068"/>
-<polygon fill="#006400" stroke="#006400" points="148.6892,-14.2198 138.589,-17.4188 148.4797,-21.2166 148.6892,-14.2198"/>
+<path fill="none" stroke="#006400" d="M161.1234,-17.5695C176.6774,-17.998 192.863,-18.4439 207.7775,-18.8548"/>
+<polygon fill="#006400" stroke="#006400" points="161.1056,-14.0678 151.013,-17.291 160.9128,-21.0651 161.1056,-14.0678"/>
 </g>
 <!-- Node1&#45;&gt;Node2 -->
 <g id="edge4" class="edge">
 <title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M308.4798,-28.4539C316.8769,-29.451 325.4706,-30.4715 333.8775,-31.4698"/>
-<polygon fill="#191970" stroke="#191970" points="308.7035,-24.9559 298.3605,-27.2522 307.878,-31.9071 308.7035,-24.9559"/>
+<path fill="none" stroke="#191970" d="M337.5536,-28.5182C345.9464,-29.4788 354.5203,-30.4602 362.9205,-31.4216"/>
+<polygon fill="#191970" stroke="#191970" points="337.763,-25.0194 327.4298,-27.3594 336.9669,-31.974 337.763,-25.0194"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_205.svg b/docs/reference/api/doxygen/inherit_graph_205.svg
index c51fce95b..4122c825d 100644
--- a/docs/reference/api/doxygen/inherit_graph_205.svg
+++ b/docs/reference/api/doxygen/inherit_graph_205.svg
@@ -4,20 +4,81 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="121pt" height="39pt"
- viewBox="0.00 0.00 121.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="477pt" height="88pt"
+ viewBox="0.00 0.00 477.00 88.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 84)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 117,-35 117,4 -4,4"/>
-<!-- Node0 -->
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-84 473,-84 473,4 -4,4"/>
+<!-- Node1245 -->
 <g id="node1" class="node">
+<title>Node1245</title>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="tvm::tir::ExprFunctor\l\&lt; void(const PrimExpr &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-49.5 0,-79.5 149,-79.5 149,-49.5 0,-49.5"/>
+<text text-anchor="start" x="8" y="-67.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
+<text text-anchor="middle" x="74.5" y="-56.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; void(const PrimExpr &amp;)&gt;</text>
+</a>
+</g>
+</g>
+<!-- Node1246 -->
+<g id="node2" class="node">
+<title>Node1246</title>
+<g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1ExprVisitor.html" target="_top" xlink:title="ExprVisitor. ">
+<polygon fill="#ffffff" stroke="#000000" points="185.5,-52 185.5,-71 297.5,-71 297.5,-52 185.5,-52"/>
+<text text-anchor="middle" x="241.5" y="-59" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprVisitor</text>
+</a>
+</g>
+</g>
+<!-- Node1245&#45;&gt;Node1246 -->
+<g id="edge1" class="edge">
+<title>Node1245&#45;&gt;Node1246</title>
+<path fill="none" stroke="#191970" d="M159.2165,-62.9781C168.0933,-62.8187 176.9518,-62.6595 185.3905,-62.508"/>
+<polygon fill="#191970" stroke="#191970" points="159.1434,-59.4788 149.2079,-63.1579 159.2692,-66.4777 159.1434,-59.4788"/>
+</g>
+<!-- Node2 -->
+<g id="node3" class="node">
+<title>Node2</title>
+<g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1StmtExprVisitor.html" target="_top" xlink:title="Visitor that recursively visit stmts and exprs on them. ">
+<polygon fill="#ffffff" stroke="#000000" points="334,-30 334,-49 469,-49 469,-30 334,-30"/>
+<text text-anchor="middle" x="401.5" y="-37" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprVisitor</text>
+</a>
+</g>
+</g>
+<!-- Node1246&#45;&gt;Node2 -->
+<g id="edge2" class="edge">
+<title>Node1246&#45;&gt;Node2</title>
+<path fill="none" stroke="#191970" d="M307.8508,-52.3768C316.4242,-51.1979 325.2131,-49.9895 333.8078,-48.8077"/>
+<polygon fill="#191970" stroke="#191970" points="307.34,-48.914 297.91,-53.7436 308.2936,-55.8487 307.34,-48.914"/>
+</g>
+<!-- Node0 -->
+<g id="node4" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1usmp_1_1algo_1_1GreedyBase.html" target="_top" xlink:title="This is the base class for Greedy Algorithms where the sorting is specialized in the extended classes...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 113,-30.5 113,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::usmp::algo</text>
-<text text-anchor="middle" x="56.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::GreedyBase</text>
+<g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="tvm::tir::StmtFunctor\l\&lt; void(const Stmt &amp;)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="10.5,-.5 10.5,-30.5 138.5,-30.5 138.5,-.5 10.5,-.5"/>
+<text text-anchor="start" x="18.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
+<text text-anchor="middle" x="74.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; void(const Stmt &amp;)&gt;</text>
 </a>
 </g>
 </g>
+<!-- Node1 -->
+<g id="node5" class="node">
+<title>Node1</title>
+<g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1StmtVisitor.html" target="_top" xlink:title="StmtVisitor. ">
+<polygon fill="#ffffff" stroke="#000000" points="185,-11 185,-30 298,-30 298,-11 185,-11"/>
+<text text-anchor="middle" x="241.5" y="-18" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtVisitor</text>
+</a>
+</g>
+</g>
+<!-- Node0&#45;&gt;Node1 -->
+<g id="edge3" class="edge">
+<title>Node0&#45;&gt;Node1</title>
+<path fill="none" stroke="#006400" d="M148.7178,-17.7221C160.8779,-18.0862 173.3038,-18.4582 184.9469,-18.8068"/>
+<polygon fill="#006400" stroke="#006400" points="148.6892,-14.2198 138.589,-17.4188 148.4797,-21.2166 148.6892,-14.2198"/>
+</g>
+<!-- Node1&#45;&gt;Node2 -->
+<g id="edge4" class="edge">
+<title>Node1&#45;&gt;Node2</title>
+<path fill="none" stroke="#191970" d="M308.4798,-28.4539C316.8769,-29.451 325.4706,-30.4715 333.8775,-31.4698"/>
+<polygon fill="#191970" stroke="#191970" points="308.7035,-24.9559 298.3605,-27.2522 307.878,-31.9071 308.7035,-24.9559"/>
+</g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_206.svg b/docs/reference/api/doxygen/inherit_graph_206.svg
index 696913cc6..c51fce95b 100644
--- a/docs/reference/api/doxygen/inherit_graph_206.svg
+++ b/docs/reference/api/doxygen/inherit_graph_206.svg
@@ -4,21 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="153pt" height="72pt"
- viewBox="0.00 0.00 153.00 72.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 68)">
+<svg width="121pt" height="39pt"
+ viewBox="0.00 0.00 121.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-68 149,-68 149,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 117,-35 117,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TypedEnvFunc.html" target="_top" xlink:title="tvm::TypedEnvFunc\&lt;\l bool(const Array\&lt; Type\l \&gt; &amp;args, int num_inputs,\l const Attrs &amp;attrs, const\l TypeReporter &amp;reporter)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-63.5 145,-63.5 145,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypedEnvFunc&lt;</text>
-<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> bool(const Array&lt; Type</text>
-<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;args, int num_inputs,</text>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> const Attrs &amp;attrs, const</text>
-<text text-anchor="middle" x="72.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> TypeReporter &amp;reporter)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1tir_1_1usmp_1_1algo_1_1GreedyBase.html" target="_top" xlink:title="This is the base class for Greedy Algorithms where the sorting is specialized in the extended classes...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 113,-30.5 113,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::usmp::algo</text>
+<text text-anchor="middle" x="56.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::GreedyBase</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_207.svg b/docs/reference/api/doxygen/inherit_graph_207.svg
index d6c8914de..696913cc6 100644
--- a/docs/reference/api/doxygen/inherit_graph_207.svg
+++ b/docs/reference/api/doxygen/inherit_graph_207.svg
@@ -4,18 +4,21 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="127pt" height="39pt"
- viewBox="0.00 0.00 127.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="153pt" height="72pt"
+ viewBox="0.00 0.00 153.00 72.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 68)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 123,-35 123,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-68 149,-68 149,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TypedEnvFunc.html" target="_top" xlink:title="Please refer to TypedEnvFunc&lt;R(Args..)&gt;. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 119,-30.5 119,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypedEnvFunc&lt;</text>
-<text text-anchor="middle" x="59.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> FType &gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TypedEnvFunc.html" target="_top" xlink:title="tvm::TypedEnvFunc\&lt;\l bool(const Array\&lt; Type\l \&gt; &amp;args, int num_inputs,\l const Attrs &amp;attrs, const\l TypeReporter &amp;reporter)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-63.5 145,-63.5 145,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypedEnvFunc&lt;</text>
+<text text-anchor="start" x="8" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> bool(const Array&lt; Type</text>
+<text text-anchor="start" x="8" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> &gt; &amp;args, int num_inputs,</text>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> const Attrs &amp;attrs, const</text>
+<text text-anchor="middle" x="72.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> TypeReporter &amp;reporter)&gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_208.svg b/docs/reference/api/doxygen/inherit_graph_208.svg
index 79483a1c4..d6c8914de 100644
--- a/docs/reference/api/doxygen/inherit_graph_208.svg
+++ b/docs/reference/api/doxygen/inherit_graph_208.svg
@@ -4,17 +4,18 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="158pt" height="28pt"
- viewBox="0.00 0.00 158.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="127pt" height="39pt"
+ viewBox="0.00 0.00 127.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 154,-24 154,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 123,-35 123,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; FType \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 150,-19.5 150,-.5 0,-.5"/>
-<text text-anchor="middle" x="75" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; FType &gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TypedEnvFunc.html" target="_top" xlink:title="Please refer to TypedEnvFunc&lt;R(Args..)&gt;. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 119,-30.5 119,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypedEnvFunc&lt;</text>
+<text text-anchor="middle" x="59.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> FType &gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_209.svg b/docs/reference/api/doxygen/inherit_graph_209.svg
index ef7091768..79483a1c4 100644
--- a/docs/reference/api/doxygen/inherit_graph_209.svg
+++ b/docs/reference/api/doxygen/inherit_graph_209.svg
@@ -4,18 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="145pt" height="39pt"
- viewBox="0.00 0.00 145.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
+<svg width="158pt" height="28pt"
+ viewBox="0.00 0.00 158.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 141,-35 141,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 154,-24 154,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; R\l(const Type &amp;n, Args...)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 137,-30.5 137,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; R</text>
-<text text-anchor="middle" x="68.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">(const Type &amp;n, Args...)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; FType \&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 150,-19.5 150,-.5 0,-.5"/>
+<text text-anchor="middle" x="75" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; FType &gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_210.svg b/docs/reference/api/doxygen/inherit_graph_210.svg
index ca464afe2..ef7091768 100644
--- a/docs/reference/api/doxygen/inherit_graph_210.svg
+++ b/docs/reference/api/doxygen/inherit_graph_210.svg
@@ -4,35 +4,20 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="280pt" height="39pt"
- viewBox="0.00 0.00 280.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="145pt" height="39pt"
+ viewBox="0.00 0.00 145.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 276,-35 276,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 141,-35 141,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; Type\l(const Type &amp;n)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 134,-30.5 134,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; Type</text>
-<text text-anchor="middle" x="67" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">(const Type &amp;n)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; R\l(const Type &amp;n, Args...)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 137,-30.5 137,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; R</text>
+<text text-anchor="middle" x="68.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">(const Type &amp;n, Args...)&gt;</text>
 </a>
 </g>
 </g>
-<!-- Node1 -->
-<g id="node2" class="node">
-<title>Node1</title>
-<g id="a_node2"><a xlink:href="classtvm_1_1TypeMutator.html" target="_top" xlink:title="TypeMutator that mutates expressions. ">
-<polygon fill="#ffffff" stroke="#000000" points="170,-6 170,-25 272,-25 272,-6 170,-6"/>
-<text text-anchor="middle" x="221" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeMutator</text>
-</a>
-</g>
-</g>
-<!-- Node0&#45;&gt;Node1 -->
-<g id="edge1" class="edge">
-<title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M144.2712,-15.5C152.9574,-15.5 161.6391,-15.5 169.874,-15.5"/>
-<polygon fill="#191970" stroke="#191970" points="144.0987,-12.0001 134.0987,-15.5 144.0986,-19.0001 144.0987,-12.0001"/>
-</g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_211.svg b/docs/reference/api/doxygen/inherit_graph_211.svg
index cd45c0e3c..ca464afe2 100644
--- a/docs/reference/api/doxygen/inherit_graph_211.svg
+++ b/docs/reference/api/doxygen/inherit_graph_211.svg
@@ -4,35 +4,35 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="272pt" height="39pt"
- viewBox="0.00 0.00 272.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="280pt" height="39pt"
+ viewBox="0.00 0.00 280.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 268,-35 268,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 276,-35 276,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; void\l(const Type &amp;n)\&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 132,-30.5 132,-.5 0,-.5"/>
-<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; void</text>
-<text text-anchor="middle" x="66" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">(const Type &amp;n)&gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; Type\l(const Type &amp;n)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 134,-30.5 134,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; Type</text>
+<text text-anchor="middle" x="67" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">(const Type &amp;n)&gt;</text>
 </a>
 </g>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
 <title>Node1</title>
-<g id="a_node2"><a xlink:href="classtvm_1_1TypeVisitor.html" target="_top" xlink:title="A type visitor that recursively visit types. ">
-<polygon fill="#ffffff" stroke="#000000" points="168,-6 168,-25 264,-25 264,-6 168,-6"/>
-<text text-anchor="middle" x="216" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeVisitor</text>
+<g id="a_node2"><a xlink:href="classtvm_1_1TypeMutator.html" target="_top" xlink:title="TypeMutator that mutates expressions. ">
+<polygon fill="#ffffff" stroke="#000000" points="170,-6 170,-25 272,-25 272,-6 170,-6"/>
+<text text-anchor="middle" x="221" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeMutator</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M142.338,-15.5C150.9637,-15.5 159.5657,-15.5 167.6875,-15.5"/>
-<polygon fill="#191970" stroke="#191970" points="142.2289,-12.0001 132.2288,-15.5 142.2288,-19.0001 142.2289,-12.0001"/>
+<path fill="none" stroke="#191970" d="M144.2712,-15.5C152.9574,-15.5 161.6391,-15.5 169.874,-15.5"/>
+<polygon fill="#191970" stroke="#191970" points="144.0987,-12.0001 134.0987,-15.5 144.0986,-19.0001 144.0987,-12.0001"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_212.svg b/docs/reference/api/doxygen/inherit_graph_212.svg
index 6c1516dd2..cd45c0e3c 100644
--- a/docs/reference/api/doxygen/inherit_graph_212.svg
+++ b/docs/reference/api/doxygen/inherit_graph_212.svg
@@ -4,19 +4,35 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="146pt" height="28pt"
- viewBox="0.00 0.00 146.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
+<svg width="272pt" height="39pt"
+ viewBox="0.00 0.00 272.00 39.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 142,-24 142,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 268,-35 268,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1VirtualDeviceCache.html" target="_top" xlink:title="A cache of VirtualDevices. This can be used: ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 138,-19.5 138,-.5 0,-.5"/>
-<text text-anchor="middle" x="69" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::VirtualDeviceCache</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1TypeFunctor.html" target="_top" xlink:title="tvm::TypeFunctor\&lt; void\l(const Type &amp;n)\&gt;">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-30.5 132,-30.5 132,-.5 0,-.5"/>
+<text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeFunctor&lt; void</text>
+<text text-anchor="middle" x="66" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">(const Type &amp;n)&gt;</text>
 </a>
 </g>
 </g>
+<!-- Node1 -->
+<g id="node2" class="node">
+<title>Node1</title>
+<g id="a_node2"><a xlink:href="classtvm_1_1TypeVisitor.html" target="_top" xlink:title="A type visitor that recursively visit types. ">
+<polygon fill="#ffffff" stroke="#000000" points="168,-6 168,-25 264,-25 264,-6 168,-6"/>
+<text text-anchor="middle" x="216" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::TypeVisitor</text>
+</a>
+</g>
+</g>
+<!-- Node0&#45;&gt;Node1 -->
+<g id="edge1" class="edge">
+<title>Node0&#45;&gt;Node1</title>
+<path fill="none" stroke="#191970" d="M142.338,-15.5C150.9637,-15.5 159.5657,-15.5 167.6875,-15.5"/>
+<polygon fill="#191970" stroke="#191970" points="142.2289,-12.0001 132.2288,-15.5 142.2288,-19.0001 142.2289,-12.0001"/>
+</g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/inherit_graph_213.svg b/docs/reference/api/doxygen/inherit_graph_213.svg
index 8a792eafd..6c1516dd2 100644
--- a/docs/reference/api/doxygen/inherit_graph_213.svg
+++ b/docs/reference/api/doxygen/inherit_graph_213.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="156pt" height="28pt"
- viewBox="0.00 0.00 156.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="146pt" height="28pt"
+ viewBox="0.00 0.00 146.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 152,-24 152,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 142,-24 142,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="classtvm_1_1With.html" target="_top" xlink:title="RAII wrapper function to enter and exit a context object similar to python&#39;s with syntax...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 148,-19.5 148,-.5 0,-.5"/>
-<text text-anchor="middle" x="74" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::With&lt; ContextType &gt;</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1VirtualDeviceCache.html" target="_top" xlink:title="A cache of VirtualDevices. This can be used: ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 138,-19.5 138,-.5 0,-.5"/>
+<text text-anchor="middle" x="69" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::VirtualDeviceCache</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_214.svg b/docs/reference/api/doxygen/inherit_graph_214.svg
index 63cdf46e3..8a792eafd 100644
--- a/docs/reference/api/doxygen/inherit_graph_214.svg
+++ b/docs/reference/api/doxygen/inherit_graph_214.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="111pt" height="28pt"
- viewBox="0.00 0.00 111.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="156pt" height="28pt"
+ viewBox="0.00 0.00 156.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 107,-24 107,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 152,-24 152,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structtvm__workspace__t.html" target="_top" xlink:title="tvm_workspace_t">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 103,-19.5 103,-.5 0,-.5"/>
-<text text-anchor="middle" x="51.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm_workspace_t</text>
+<g id="a_node1"><a xlink:href="classtvm_1_1With.html" target="_top" xlink:title="RAII wrapper function to enter and exit a context object similar to python&#39;s with syntax...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 148,-19.5 148,-.5 0,-.5"/>
+<text text-anchor="middle" x="74" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::With&lt; ContextType &gt;</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_215.svg b/docs/reference/api/doxygen/inherit_graph_215.svg
index dfb044798..63cdf46e3 100644
--- a/docs/reference/api/doxygen/inherit_graph_215.svg
+++ b/docs/reference/api/doxygen/inherit_graph_215.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="105pt" height="28pt"
- viewBox="0.00 0.00 105.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="111pt" height="28pt"
+ viewBox="0.00 0.00 111.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 101,-24 101,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 107,-24 107,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMAotExecutor.html" target="_top" xlink:title="TVMAotExecutor">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 97,-19.5 97,-.5 0,-.5"/>
-<text text-anchor="middle" x="48.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMAotExecutor</text>
+<g id="a_node1"><a xlink:href="structtvm__workspace__t.html" target="_top" xlink:title="tvm_workspace_t">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 103,-19.5 103,-.5 0,-.5"/>
+<text text-anchor="middle" x="51.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm_workspace_t</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_216.svg b/docs/reference/api/doxygen/inherit_graph_216.svg
index 6be429009..dfb044798 100644
--- a/docs/reference/api/doxygen/inherit_graph_216.svg
+++ b/docs/reference/api/doxygen/inherit_graph_216.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="67pt" height="28pt"
- viewBox="0.00 0.00 67.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="105pt" height="28pt"
+ viewBox="0.00 0.00 105.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 63,-24 63,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 101,-24 101,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMArgs.html" target="_top" xlink:title="TVMArgs">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 59,-19.5 59,-.5 0,-.5"/>
-<text text-anchor="middle" x="29.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMArgs</text>
+<g id="a_node1"><a xlink:href="structTVMAotExecutor.html" target="_top" xlink:title="TVMAotExecutor">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 97,-19.5 97,-.5 0,-.5"/>
+<text text-anchor="middle" x="48.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMAotExecutor</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_217.svg b/docs/reference/api/doxygen/inherit_graph_217.svg
index 1ba163f13..6be429009 100644
--- a/docs/reference/api/doxygen/inherit_graph_217.svg
+++ b/docs/reference/api/doxygen/inherit_graph_217.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="94pt" height="28pt"
- viewBox="0.00 0.00 94.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="67pt" height="28pt"
+ viewBox="0.00 0.00 67.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 90,-24 90,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 63,-24 63,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMByteArray.html" target="_top" xlink:title="Byte array type used to pass in byte array When kTVMBytes is used as data type. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 86,-19.5 86,-.5 0,-.5"/>
-<text text-anchor="middle" x="43" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMByteArray</text>
+<g id="a_node1"><a xlink:href="structTVMArgs.html" target="_top" xlink:title="TVMArgs">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 59,-19.5 59,-.5 0,-.5"/>
+<text text-anchor="middle" x="29.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMArgs</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_218.svg b/docs/reference/api/doxygen/inherit_graph_218.svg
index c25fe4aca..1ba163f13 100644
--- a/docs/reference/api/doxygen/inherit_graph_218.svg
+++ b/docs/reference/api/doxygen/inherit_graph_218.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="108pt" height="28pt"
- viewBox="0.00 0.00 108.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="94pt" height="28pt"
+ viewBox="0.00 0.00 94.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 104,-24 104,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 90,-24 90,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMConstantInfo.html" target="_top" xlink:title="Describes one constant argument to run_model. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 100,-19.5 100,-.5 0,-.5"/>
-<text text-anchor="middle" x="50" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMConstantInfo</text>
+<g id="a_node1"><a xlink:href="structTVMByteArray.html" target="_top" xlink:title="Byte array type used to pass in byte array When kTVMBytes is used as data type. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 86,-19.5 86,-.5 0,-.5"/>
+<text text-anchor="middle" x="43" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMByteArray</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_219.svg b/docs/reference/api/doxygen/inherit_graph_219.svg
index ea3723655..c25fe4aca 100644
--- a/docs/reference/api/doxygen/inherit_graph_219.svg
+++ b/docs/reference/api/doxygen/inherit_graph_219.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="107pt" height="28pt"
- viewBox="0.00 0.00 107.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="108pt" height="28pt"
+ viewBox="0.00 0.00 108.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 103,-24 103,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 104,-24 104,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMFuncRegistry.html" target="_top" xlink:title="A data structure that facilitates function lookup by C&#45;string name. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 99,-19.5 99,-.5 0,-.5"/>
-<text text-anchor="middle" x="49.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMFuncRegistry</text>
+<g id="a_node1"><a xlink:href="structTVMConstantInfo.html" target="_top" xlink:title="Describes one constant argument to run_model. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 100,-19.5 100,-.5 0,-.5"/>
+<text text-anchor="middle" x="50" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMConstantInfo</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_220.svg b/docs/reference/api/doxygen/inherit_graph_220.svg
index ceb0f841a..ea3723655 100644
--- a/docs/reference/api/doxygen/inherit_graph_220.svg
+++ b/docs/reference/api/doxygen/inherit_graph_220.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="165pt" height="28pt"
- viewBox="0.00 0.00 165.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="107pt" height="28pt"
+ viewBox="0.00 0.00 107.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 161,-24 161,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 103,-24 103,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMGraphExecutorGraphAttr.html" target="_top" xlink:title="TVMGraphExecutorGraphAttr">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 157,-19.5 157,-.5 0,-.5"/>
-<text text-anchor="middle" x="78.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMGraphExecutorGraphAttr</text>
+<g id="a_node1"><a xlink:href="structTVMFuncRegistry.html" target="_top" xlink:title="A data structure that facilitates function lookup by C&#45;string name. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 99,-19.5 99,-.5 0,-.5"/>
+<text text-anchor="middle" x="49.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMFuncRegistry</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_221.svg b/docs/reference/api/doxygen/inherit_graph_221.svg
index 6832549bc..ceb0f841a 100644
--- a/docs/reference/api/doxygen/inherit_graph_221.svg
+++ b/docs/reference/api/doxygen/inherit_graph_221.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="91pt" height="28pt"
- viewBox="0.00 0.00 91.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="165pt" height="28pt"
+ viewBox="0.00 0.00 165.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 87,-24 87,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 161,-24 161,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMMetadata.html" target="_top" xlink:title="Top&#45;level metadata structure. Holds all other metadata types. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 83,-19.5 83,-.5 0,-.5"/>
-<text text-anchor="middle" x="41.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMMetadata</text>
+<g id="a_node1"><a xlink:href="structTVMGraphExecutorGraphAttr.html" target="_top" xlink:title="TVMGraphExecutorGraphAttr">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 157,-19.5 157,-.5 0,-.5"/>
+<text text-anchor="middle" x="78.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMGraphExecutorGraphAttr</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_222.svg b/docs/reference/api/doxygen/inherit_graph_222.svg
index 09712f904..6832549bc 100644
--- a/docs/reference/api/doxygen/inherit_graph_222.svg
+++ b/docs/reference/api/doxygen/inherit_graph_222.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="81pt" height="28pt"
- viewBox="0.00 0.00 81.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="91pt" height="28pt"
+ viewBox="0.00 0.00 91.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 77,-24 77,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 87,-24 87,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMModule.html" target="_top" xlink:title="Module container of TVM. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 73,-19.5 73,-.5 0,-.5"/>
-<text text-anchor="middle" x="36.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMModule</text>
+<g id="a_node1"><a xlink:href="structTVMMetadata.html" target="_top" xlink:title="Top&#45;level metadata structure. Holds all other metadata types. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 83,-19.5 83,-.5 0,-.5"/>
+<text text-anchor="middle" x="41.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMMetadata</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_223.svg b/docs/reference/api/doxygen/inherit_graph_223.svg
index 76df5f2a6..09712f904 100644
--- a/docs/reference/api/doxygen/inherit_graph_223.svg
+++ b/docs/reference/api/doxygen/inherit_graph_223.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="146pt" height="28pt"
- viewBox="0.00 0.00 146.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="81pt" height="28pt"
+ viewBox="0.00 0.00 81.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 142,-24 142,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 77,-24 77,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMMutableFuncRegistry.html" target="_top" xlink:title="A TVMFuncRegistry that supports adding and changing the functions. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 138,-19.5 138,-.5 0,-.5"/>
-<text text-anchor="middle" x="69" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMMutableFuncRegistry</text>
+<g id="a_node1"><a xlink:href="structTVMModule.html" target="_top" xlink:title="Module container of TVM. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 73,-19.5 73,-.5 0,-.5"/>
+<text text-anchor="middle" x="36.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMModule</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_224.svg b/docs/reference/api/doxygen/inherit_graph_224.svg
index 8f47093be..76df5f2a6 100644
--- a/docs/reference/api/doxygen/inherit_graph_224.svg
+++ b/docs/reference/api/doxygen/inherit_graph_224.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="90pt" height="28pt"
- viewBox="0.00 0.00 90.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="146pt" height="28pt"
+ viewBox="0.00 0.00 146.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 86,-24 86,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 142,-24 142,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMOpParam.html" target="_top" xlink:title="operator attributes about tvm op ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 82,-19.5 82,-.5 0,-.5"/>
-<text text-anchor="middle" x="41" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMOpParam</text>
+<g id="a_node1"><a xlink:href="structTVMMutableFuncRegistry.html" target="_top" xlink:title="A TVMFuncRegistry that supports adding and changing the functions. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 138,-19.5 138,-.5 0,-.5"/>
+<text text-anchor="middle" x="69" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMMutableFuncRegistry</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_225.svg b/docs/reference/api/doxygen/inherit_graph_225.svg
index ef3c18178..8f47093be 100644
--- a/docs/reference/api/doxygen/inherit_graph_225.svg
+++ b/docs/reference/api/doxygen/inherit_graph_225.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="102pt" height="28pt"
- viewBox="0.00 0.00 102.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="90pt" height="28pt"
+ viewBox="0.00 0.00 90.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 98,-24 98,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 86,-24 86,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMPackedFunc.html" target="_top" xlink:title="TVMPackedFunc">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 94,-19.5 94,-.5 0,-.5"/>
-<text text-anchor="middle" x="47" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMPackedFunc</text>
+<g id="a_node1"><a xlink:href="structTVMOpParam.html" target="_top" xlink:title="operator attributes about tvm op ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 82,-19.5 82,-.5 0,-.5"/>
+<text text-anchor="middle" x="41" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMOpParam</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_226.svg b/docs/reference/api/doxygen/inherit_graph_226.svg
index 0c046b247..ef3c18178 100644
--- a/docs/reference/api/doxygen/inherit_graph_226.svg
+++ b/docs/reference/api/doxygen/inherit_graph_226.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="129pt" height="28pt"
- viewBox="0.00 0.00 129.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="102pt" height="28pt"
+ viewBox="0.00 0.00 102.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 125,-24 125,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 98,-24 98,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMParallelGroupEnv.html" target="_top" xlink:title="Environment for TVM parallel task. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 121,-19.5 121,-.5 0,-.5"/>
-<text text-anchor="middle" x="60.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMParallelGroupEnv</text>
+<g id="a_node1"><a xlink:href="structTVMPackedFunc.html" target="_top" xlink:title="TVMPackedFunc">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 94,-19.5 94,-.5 0,-.5"/>
+<text text-anchor="middle" x="47" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMPackedFunc</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_227.svg b/docs/reference/api/doxygen/inherit_graph_227.svg
index aab2aa81d..0c046b247 100644
--- a/docs/reference/api/doxygen/inherit_graph_227.svg
+++ b/docs/reference/api/doxygen/inherit_graph_227.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="96pt" height="28pt"
- viewBox="0.00 0.00 96.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="129pt" height="28pt"
+ viewBox="0.00 0.00 129.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 92,-24 92,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 125,-24 125,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="structTVMTensorInfo.html" target="_top" xlink:title="Describes one tensor argument to run_model. NOTE: while TIR allows for other types of arguments...">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 88,-19.5 88,-.5 0,-.5"/>
-<text text-anchor="middle" x="44" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMTensorInfo</text>
+<g id="a_node1"><a xlink:href="structTVMParallelGroupEnv.html" target="_top" xlink:title="Environment for TVM parallel task. ">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 121,-19.5 121,-.5 0,-.5"/>
+<text text-anchor="middle" x="60.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMParallelGroupEnv</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_228.svg b/docs/reference/api/doxygen/inherit_graph_228.svg
index 1fa07e7a9..aab2aa81d 100644
--- a/docs/reference/api/doxygen/inherit_graph_228.svg
+++ b/docs/reference/api/doxygen/inherit_graph_228.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="73pt" height="28pt"
- viewBox="0.00 0.00 73.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="96pt" height="28pt"
+ viewBox="0.00 0.00 96.00 28.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 24)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 69,-24 69,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-24 92,-24 92,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<g id="a_node1"><a xlink:href="unionTVMValue.html" target="_top" xlink:title="Union type of values being passed through API and function calls. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 65,-19.5 65,-.5 0,-.5"/>
-<text text-anchor="middle" x="32.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMValue</text>
+<g id="a_node1"><a xlink:href="structTVMTensorInfo.html" target="_top" xlink:title="Describes one tensor argument to run_model. NOTE: while TIR allows for other types of arguments...">
+<polygon fill="#ffffff" stroke="#000000" points="0,-.5 0,-19.5 88,-19.5 88,-.5 0,-.5"/>
+<text text-anchor="middle" x="44" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">TVMTensorInfo</text>
 </a>
 </g>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_228.svg b/docs/reference/api/doxygen/inherit_graph_229.svg
similarity index 100%
copy from docs/reference/api/doxygen/inherit_graph_228.svg
copy to docs/reference/api/doxygen/inherit_graph_229.svg
diff --git a/docs/reference/api/doxygen/inherit_graph_39.svg b/docs/reference/api/doxygen/inherit_graph_39.svg
index 43fae6282..c935f2a14 100644
--- a/docs/reference/api/doxygen/inherit_graph_39.svg
+++ b/docs/reference/api/doxygen/inherit_graph_39.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 100)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-100 407,-100 407,4 -4,4"/>
-<!-- Node1228 -->
+<!-- Node1229 -->
 <g id="node1" class="node">
-<title>Node1228</title>
+<title>Node1229</title>
 <g id="a_node1"><a xlink:href="structtvm_1_1detail_1_1ImplSEqualReduce.html" target="_top" xlink:title="tvm::detail::ImplSEqualReduce\&lt; T \&gt;">
 <polygon fill="#ffffff" stroke="#000000" points="0,-76.5 0,-95.5 194,-95.5 194,-76.5 0,-76.5"/>
 <text text-anchor="middle" x="97" y="-83.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::detail::ImplSEqualReduce&lt; T &gt;</text>
@@ -27,24 +27,24 @@
 </a>
 </g>
 </g>
-<!-- Node1228&#45;&gt;Node1 -->
+<!-- Node1229&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
-<title>Node1228&#45;&gt;Node1</title>
+<title>Node1229&#45;&gt;Node1</title>
 <path fill="none" stroke="#191970" d="M162.1331,-74.7241C193.7121,-69.2571 231.2304,-62.7619 261.3715,-57.5439"/>
 <polygon fill="#191970" stroke="#191970" points="161.3622,-71.3054 152.1058,-76.46 162.5563,-78.2028 161.3622,-71.3054"/>
 </g>
-<!-- Node1225 -->
+<!-- Node1226 -->
 <g id="node3" class="node">
-<title>Node1225</title>
+<title>Node1226</title>
 <g id="a_node3"><a xlink:href="structtvm_1_1detail_1_1ImplSHashReduce.html" target="_top" xlink:title="tvm::detail::ImplSHashReduce\&lt; T \&gt;">
 <polygon fill="#ffffff" stroke="#000000" points="1,-38.5 1,-57.5 193,-57.5 193,-38.5 1,-38.5"/>
 <text text-anchor="middle" x="97" y="-45.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::detail::ImplSHashReduce&lt; T &gt;</text>
 </a>
 </g>
 </g>
-<!-- Node1225&#45;&gt;Node1 -->
+<!-- Node1226&#45;&gt;Node1 -->
 <g id="edge2" class="edge">
-<title>Node1225&#45;&gt;Node1</title>
+<title>Node1226&#45;&gt;Node1</title>
 <path fill="none" stroke="#191970" d="M203.33,-48C212.2576,-48 221.2155,-48 229.9669,-48"/>
 <polygon fill="#191970" stroke="#191970" points="203.2759,-44.5001 193.2759,-48 203.2759,-51.5001 203.2759,-44.5001"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_42.svg b/docs/reference/api/doxygen/inherit_graph_42.svg
index f683185f7..1760419f4 100644
--- a/docs/reference/api/doxygen/inherit_graph_42.svg
+++ b/docs/reference/api/doxygen/inherit_graph_42.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 46)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-46 247,-46 247,4 -4,4"/>
-<!-- Node87 -->
+<!-- Node88 -->
 <g id="node1" class="node">
-<title>Node87</title>
+<title>Node88</title>
 <polygon fill="#ffffff" stroke="#bfbfbf" points="0,-11.5 0,-30.5 63,-30.5 63,-11.5 0,-11.5"/>
 <text text-anchor="middle" x="31.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">true_type</text>
 </g>
@@ -26,9 +26,9 @@
 </a>
 </g>
 </g>
-<!-- Node87&#45;&gt;Node0 -->
+<!-- Node88&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
-<title>Node87&#45;&gt;Node0</title>
+<title>Node88&#45;&gt;Node0</title>
 <path fill="none" stroke="#191970" d="M73.3615,-21C81.421,-21 90.0752,-21 98.7807,-21"/>
 <polygon fill="#191970" stroke="#191970" points="73.1547,-17.5001 63.1547,-21 73.1547,-24.5001 73.1547,-17.5001"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_43.svg b/docs/reference/api/doxygen/inherit_graph_43.svg
index 82c6b16dc..f2d93477c 100644
--- a/docs/reference/api/doxygen/inherit_graph_43.svg
+++ b/docs/reference/api/doxygen/inherit_graph_43.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 35)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-35 252,-35 252,4 -4,4"/>
-<!-- Node1240 -->
+<!-- Node1241 -->
 <g id="node1" class="node">
-<title>Node1240</title>
+<title>Node1241</title>
 <polygon fill="#ffffff" stroke="#bfbfbf" points="0,-6 0,-25 68,-25 68,-6 0,-6"/>
 <text text-anchor="middle" x="34" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">false_type</text>
 </g>
@@ -25,9 +25,9 @@
 </a>
 </g>
 </g>
-<!-- Node1240&#45;&gt;Node0 -->
+<!-- Node1241&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
-<title>Node1240&#45;&gt;Node0</title>
+<title>Node1241&#45;&gt;Node0</title>
 <path fill="none" stroke="#191970" d="M78.0394,-15.5C86.2242,-15.5 94.9704,-15.5 103.7423,-15.5"/>
 <polygon fill="#191970" stroke="#191970" points="78.0126,-12.0001 68.0125,-15.5 78.0125,-19.0001 78.0126,-12.0001"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherits.html b/docs/reference/api/doxygen/inherits.html
index 50f9d3e1b..29785d73c 100644
--- a/docs/reference/api/doxygen/inherits.html
+++ b/docs/reference/api/doxygen/inherits.html
@@ -391,137 +391,139 @@ $(function() {
 </td></tr>
 <tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_162.svg" width="236" height="126"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_163.svg" width="211" height="96"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_163.svg" width="242" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_164.svg" width="210" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_164.svg" width="211" height="96"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_165.svg" width="256" height="82"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_165.svg" width="210" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_166.svg" width="242" height="96"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_166.svg" width="256" height="82"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_167.svg" width="284" height="126"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_167.svg" width="242" height="96"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_168.svg" width="260" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_168.svg" width="284" height="126"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_169.svg" width="290" height="82"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_169.svg" width="260" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_170.svg" width="266" height="82"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_170.svg" width="290" height="82"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_171.svg" width="266" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_171.svg" width="266" height="82"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_172.svg" width="210" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_172.svg" width="266" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_173.svg" width="211" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_173.svg" width="210" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_174.svg" width="210" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_174.svg" width="211" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_175.svg" width="304" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_175.svg" width="210" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_176.svg" width="272" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_176.svg" width="304" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_177.svg" width="192" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_177.svg" width="272" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_178.svg" width="214" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_178.svg" width="192" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_179.svg" width="194" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_179.svg" width="214" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_180.svg" width="224" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_180.svg" width="194" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_181.svg" width="208" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_181.svg" width="224" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_182.svg" width="215" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_182.svg" width="208" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_183.svg" width="230" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_183.svg" width="215" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_184.svg" width="163" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_184.svg" width="230" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_185.svg" width="160" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_185.svg" width="163" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_186.svg" width="168" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_186.svg" width="160" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_187.svg" width="247" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_187.svg" width="168" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_188.svg" width="156" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_188.svg" width="247" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_189.svg" width="356" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_189.svg" width="156" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_190.svg" width="194" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_190.svg" width="356" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_191.svg" width="187" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_191.svg" width="194" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_192.svg" width="170" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_192.svg" width="187" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_193.svg" width="162" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_193.svg" width="170" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_194.svg" width="150" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_194.svg" width="162" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_195.svg" width="188" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_195.svg" width="150" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_196.svg" width="166" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_196.svg" width="188" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_197.svg" width="186" height="67"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_197.svg" width="166" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_198.svg" width="187" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_198.svg" width="186" height="67"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_199.svg" width="162" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_199.svg" width="187" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_200.svg" width="152" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_200.svg" width="162" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_201.svg" width="167" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_201.svg" width="152" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_202.svg" width="203" height="67"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_202.svg" width="167" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_203.svg" width="683" height="118"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_203.svg" width="203" height="67"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_204.svg" width="636" height="118"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_204.svg" width="683" height="118"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_205.svg" width="162" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_205.svg" width="636" height="118"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_206.svg" width="204" height="96"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_206.svg" width="162" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_207.svg" width="170" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_207.svg" width="204" height="96"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_208.svg" width="211" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_208.svg" width="170" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_209.svg" width="194" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_209.svg" width="211" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_210.svg" width="374" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_210.svg" width="194" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_211.svg" width="363" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_211.svg" width="374" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_212.svg" width="195" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_212.svg" width="363" height="52"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_213.svg" width="208" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_213.svg" width="195" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_214.svg" width="148" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_214.svg" width="208" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_215.svg" width="140" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_215.svg" width="148" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_216.svg" width="90" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_216.svg" width="140" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_217.svg" width="126" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_217.svg" width="90" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_218.svg" width="144" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_218.svg" width="126" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_219.svg" width="143" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_219.svg" width="144" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_220.svg" width="220" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_220.svg" width="143" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_221.svg" width="122" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_221.svg" width="220" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_222.svg" width="108" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_222.svg" width="122" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_223.svg" width="195" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_223.svg" width="108" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_224.svg" width="120" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_224.svg" width="195" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_225.svg" width="136" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_225.svg" width="120" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_226.svg" width="172" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_226.svg" width="136" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_227.svg" width="128" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_227.svg" width="172" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
-<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_228.svg" width="98" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_228.svg" width="128" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+</td></tr>
+<tr><td><iframe scrolling="no" frameborder="0" src="inherit_graph_229.svg" width="98" height="38"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </td></tr>
 </table>
 </div><!-- contents -->
diff --git a/docs/reference/api/doxygen/namespacemembers_f.html b/docs/reference/api/doxygen/namespacemembers_f.html
index 4807445a4..0af45576a 100644
--- a/docs/reference/api/doxygen/namespacemembers_f.html
+++ b/docs/reference/api/doxygen/namespacemembers_f.html
@@ -299,6 +299,9 @@ $(function() {
 <li>FTVMStrategy
 : <a class="el" href="namespacetvm_1_1relay.html#a2eb2ad4e7a83e0a28e2ad073d7bf9305">tvm::relay</a>
 </li>
+<li>FTVMTargetParser
+: <a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">tvm</a>
+</li>
 <li>FTVMTIRToRuntime
 : <a class="el" href="namespacetvm.html#ade89c3d682d83c2e14ec9337084541e3">tvm</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_func_p.html b/docs/reference/api/doxygen/namespacemembers_func_p.html
index 803915cb0..7c33a5248 100644
--- a/docs/reference/api/doxygen/namespacemembers_func_p.html
+++ b/docs/reference/api/doxygen/namespacemembers_func_p.html
@@ -67,12 +67,12 @@ $(function() {
 <li>PackImportsToLLVM()
 : <a class="el" href="namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6">tvm::codegen</a>
 </li>
-<li>Pad()
-: <a class="el" href="namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121">tvm::topi</a>
-</li>
 <li>pad()
 : <a class="el" href="namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5">tvm::topi</a>
 </li>
+<li>Pad()
+: <a class="el" href="namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121">tvm::topi</a>
+</li>
 <li>parallel_for()
 : <a class="el" href="namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c">tvm::support</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_func_s.html b/docs/reference/api/doxygen/namespacemembers_func_s.html
index f7e755cea..1a0b189a7 100644
--- a/docs/reference/api/doxygen/namespacemembers_func_s.html
+++ b/docs/reference/api/doxygen/namespacemembers_func_s.html
@@ -234,12 +234,12 @@ $(function() {
 <li>Specialize()
 : <a class="el" href="namespacetvm_1_1tir.html#a69b6f1b0014dc6e7dd390cff746e9782">tvm::tir</a>
 </li>
-<li>split()
-: <a class="el" href="namespacetvm_1_1topi.html#af4e59b01a5842baf6b47ad3f83731f53">tvm::topi</a>
-</li>
 <li>Split()
 : <a class="el" href="namespacetvm_1_1topi.html#a164125ca6dd5c4b677f72e63ce6b3c21">tvm::topi</a>
 </li>
+<li>split()
+: <a class="el" href="namespacetvm_1_1topi.html#af4e59b01a5842baf6b47ad3f83731f53">tvm::topi</a>
+</li>
 <li>split_sections()
 : <a class="el" href="namespacetvm_1_1topi.html#acc643e2ed166fa2ed82a95853e145619">tvm::topi</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_p.html b/docs/reference/api/doxygen/namespacemembers_p.html
index 33cb466d5..eb3b32fd5 100644
--- a/docs/reference/api/doxygen/namespacemembers_p.html
+++ b/docs/reference/api/doxygen/namespacemembers_p.html
@@ -67,12 +67,12 @@ $(function() {
 <li>PackImportsToLLVM()
 : <a class="el" href="namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6">tvm::codegen</a>
 </li>
-<li>Pad()
-: <a class="el" href="namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121">tvm::topi</a>
-</li>
 <li>pad()
 : <a class="el" href="namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5">tvm::topi</a>
 </li>
+<li>Pad()
+: <a class="el" href="namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121">tvm::topi</a>
+</li>
 <li>parallel_for()
 : <a class="el" href="namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c">tvm::support</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_s.html b/docs/reference/api/doxygen/namespacemembers_s.html
index 247c29847..8446c6993 100644
--- a/docs/reference/api/doxygen/namespacemembers_s.html
+++ b/docs/reference/api/doxygen/namespacemembers_s.html
@@ -276,12 +276,12 @@ $(function() {
 <li>Specialize()
 : <a class="el" href="namespacetvm_1_1tir.html#a69b6f1b0014dc6e7dd390cff746e9782">tvm::tir</a>
 </li>
-<li>split()
-: <a class="el" href="namespacetvm_1_1topi.html#af4e59b01a5842baf6b47ad3f83731f53">tvm::topi</a>
-</li>
 <li>Split()
 : <a class="el" href="namespacetvm_1_1topi.html#a164125ca6dd5c4b677f72e63ce6b3c21">tvm::topi</a>
 </li>
+<li>split()
+: <a class="el" href="namespacetvm_1_1topi.html#af4e59b01a5842baf6b47ad3f83731f53">tvm::topi</a>
+</li>
 <li>split_sections()
 : <a class="el" href="namespacetvm_1_1topi.html#acc643e2ed166fa2ed82a95853e145619">tvm::topi</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_t.html b/docs/reference/api/doxygen/namespacemembers_t.html
index 5ccb05b11..4efce7fb3 100644
--- a/docs/reference/api/doxygen/namespacemembers_t.html
+++ b/docs/reference/api/doxygen/namespacemembers_t.html
@@ -72,6 +72,9 @@ $(function() {
 : <a class="el" href="namespacetvm.html#a12c5457301d8a2c03a2ba1163edd7cee">tvm</a>
 , <a class="el" href="namespacetvm_1_1topi.html#aec153e599d33c78a7592007cde1c02cb">tvm::topi</a>
 </li>
+<li>TargetJSON
+: <a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">tvm</a>
+</li>
 <li>TCallEffectKind
 : <a class="el" href="namespacetvm_1_1tir.html#a651e82a046f157a7d286b0985b3edb84">tvm::tir</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_type.html b/docs/reference/api/doxygen/namespacemembers_type.html
index af50ec73f..236e614ed 100644
--- a/docs/reference/api/doxygen/namespacemembers_type.html
+++ b/docs/reference/api/doxygen/namespacemembers_type.html
@@ -198,6 +198,9 @@ $(function() {
 <li>FTVMStrategy
 : <a class="el" href="namespacetvm_1_1relay.html#a2eb2ad4e7a83e0a28e2ad073d7bf9305">tvm::relay</a>
 </li>
+<li>FTVMTargetParser
+: <a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">tvm</a>
+</li>
 <li>FTVMTIRToRuntime
 : <a class="el" href="namespacetvm.html#ade89c3d682d83c2e14ec9337084541e3">tvm</a>
 </li>
@@ -349,6 +352,9 @@ $(function() {
 
 
 <h3><a id="index_t"></a>- t -</h3><ul>
+<li>TargetJSON
+: <a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">tvm</a>
+</li>
 <li>TCallEffectKind
 : <a class="el" href="namespacetvm_1_1tir.html#a651e82a046f157a7d286b0985b3edb84">tvm::tir</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacetvm.html b/docs/reference/api/doxygen/namespacetvm.html
index 005955acb..7a7f3fda1 100644
--- a/docs/reference/api/doxygen/namespacetvm.html
+++ b/docs/reference/api/doxygen/namespacetvm.html
@@ -560,6 +560,11 @@ Typedefs</h2></td></tr>
 <tr class="separator:a41918af1a1dc386388639a9d3ad06c5d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a7c2095aed90b2129ba631b90103313a2"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7c2095aed90b2129ba631b90103313a2">Device</a> = DLDevice</td></tr>
 <tr class="separator:a7c2095aed90b2129ba631b90103313a2"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ad27a76489f3ede07b5d3f0dd3f97d93c"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">TargetJSON</a> = <a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt; <a class="el" href="classtvm_1_1runtime_1_1String.html">String</a>, <a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a> &gt;</td></tr>
+<tr class="memdesc:ad27a76489f3ede07b5d3f0dd3f97d93c"><td class="mdescLeft">&#160;</td><td class="mdescRight">TargetParser to apply on instantiation of a given <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a>.  <a href="#ad27a76489f3ede07b5d3f0dd3f97d93c">More...</a><br /></td></tr>
+<tr class="separator:ad27a76489f3ede07b5d3f0dd3f97d93c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a069e7a9aa20098c3406c6fbcf29092b3"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">FTVMTargetParser</a> = <a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html">TypedPackedFunc</a>&lt; <a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">TargetJSON</a>(<a class="el" href="namespacetvm.html#ad27a76489f3ede07b5 [...]
+<tr class="separator:a069e7a9aa20098c3406c6fbcf29092b3"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3b1103f53a837ff14dc583e1c0b6b898"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a3b1103f53a837ff14dc583e1c0b6b898">FTVMRelayToTIR</a> = <a class="el" href="classtvm_1_1transform_1_1Pass.html">transform::Pass</a></td></tr>
 <tr class="memdesc:a3b1103f53a837ff14dc583e1c0b6b898"><td class="mdescLeft">&#160;</td><td class="mdescRight">RelayToTIR <a class="el" href="classtvm_1_1transform_1_1Pass.html">tvm::transform::Pass</a> specific to a <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a>.  <a href="#a3b1103f53a837ff14dc583e1c0b6b898">More...</a><br /></td></tr>
 <tr class="separator:a3b1103f53a837ff14dc583e1c0b6b898"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -1360,6 +1365,20 @@ Variables</h2></td></tr>
 </dl>
 <dl class="section return"><dt>Returns</dt><dd>The transformed module. </dd></dl>
 
+</div>
+</div>
+<a id="a069e7a9aa20098c3406c6fbcf29092b3"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a069e7a9aa20098c3406c6fbcf29092b3">&#9670;&nbsp;</a></span>FTVMTargetParser</h2>
+
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">using <a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">tvm::FTVMTargetParser</a> = typedef <a class="el" href="classtvm_1_1runtime_1_1TypedPackedFunc.html">TypedPackedFunc</a>&lt;<a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">TargetJSON</a>(<a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">TargetJSON</a>)&gt;</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
 </div>
 </div>
 <a id="ade89c3d682d83c2e14ec9337084541e3"></a>
@@ -1401,6 +1420,29 @@ Variables</h2></td></tr>
 <p>Abstract label for an area of memory.</p>
 <p>Currently uninterpreted and arbitrary. Likely to be replaced by a structured representation of a memory pool in the future. Please try to use this alias instead of String to aid future code migration. </p>
 
+</div>
+</div>
+<a id="ad27a76489f3ede07b5d3f0dd3f97d93c"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#ad27a76489f3ede07b5d3f0dd3f97d93c">&#9670;&nbsp;</a></span>TargetJSON</h2>
+
+<div class="memitem">
+<div class="memproto">
+      <table class="memname">
+        <tr>
+          <td class="memname">using <a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">tvm::TargetJSON</a> = typedef <a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt;<a class="el" href="classtvm_1_1runtime_1_1String.html">String</a>, <a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a>&gt;</td>
+        </tr>
+      </table>
+</div><div class="memdoc">
+
+<p>TargetParser to apply on instantiation of a given <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a>. </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">target_json</td><td><a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> in JSON format to be transformed during parsing.</td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>The transformed <a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> JSON object. </dd></dl>
+
 </div>
 </div>
 <a id="a72dcba4493adfcd8908663898ece3514"></a>
diff --git a/docs/reference/api/doxygen/search/all_10.js b/docs/reference/api/doxygen/search/all_10.js
index 021821479..49ea3887a 100644
--- a/docs/reference/api/doxygen/search/all_10.js
+++ b/docs/reference/api/doxygen/search/all_10.js
@@ -2,7 +2,7 @@ var searchData=
 [
   ['objallocatorbase',['ObjAllocatorBase',['../classtvm_1_1runtime_1_1ObjAllocatorBase.html',1,'tvm::runtime::ObjAllocatorBase&lt; Derived &gt;'],['../classtvm_1_1runtime_1_1Object.html#a8fae619f3bd1a2b2f7273d8d6525032a',1,'tvm::runtime::Object::ObjAllocatorBase()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#a8fae619f3bd1a2b2f7273d8d6525032a',1,'tvm::runtime::ObjectPtr::ObjAllocatorBase()']]],
   ['objallocatorbase_3c_20simpleobjallocator_20_3e',['ObjAllocatorBase&lt; SimpleObjAllocator &gt;',['../classtvm_1_1runtime_1_1ObjAllocatorBase.html',1,'tvm::runtime']]],
-  ['object',['Object',['../classtvm_1_1runtime_1_1Object.html',1,'tvm::runtime::Object'],['../classtvm_1_1runtime_1_1ObjectPtr.html#a0720b5f434e636e22a3ed34f847eec57',1,'tvm::runtime::ObjectPtr::Object()'],['../classtvm_1_1runtime_1_1Object.html#a133436a9ec5c4a768b94102bf95a660b',1,'tvm::runtime::Object::Object()'],['../classtvm_1_1runtime_1_1Object.html#ab7968feb6ad38ecaffc320e13819d826',1,'tvm::runtime::Object::Object(const Object &amp;other)'],['../classtvm_1_1runtime_1_1Object.html#a [...]
+  ['object',['Object',['../classtvm_1_1runtime_1_1Object.html',1,'tvm::runtime::Object'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a0ac4dada6519d071a819cc897cb15cf1',1,'tvm::runtime::vm::Instruction::object()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#a0720b5f434e636e22a3ed34f847eec57',1,'tvm::runtime::ObjectPtr::Object()'],['../classtvm_1_1runtime_1_1Object.html#a133436a9ec5c4a768b94102bf95a660b',1,'tvm::runtime::Object::Object()'],['../classtvm_1_1runtime_1_1Object.html#ab7 [...]
   ['object_2eh',['object.h',['../object_8h.html',1,'']]],
   ['object_5fpath_2eh',['object_path.h',['../object__path_8h.html',1,'']]],
   ['objectequal',['ObjectEqual',['../structtvm_1_1runtime_1_1ObjectEqual.html',1,'tvm::runtime']]],
diff --git a/docs/reference/api/doxygen/search/all_11.js b/docs/reference/api/doxygen/search/all_11.js
index 8c5da8ad1..0c5222f86 100644
--- a/docs/reference/api/doxygen/search/all_11.js
+++ b/docs/reference/api/doxygen/search/all_11.js
@@ -21,7 +21,7 @@ var searchData=
   ['packetfieldsizebytes',['PacketFieldSizeBytes',['../classtvm_1_1runtime_1_1micro__rpc_1_1PacketFieldSizeBytes.html',1,'tvm::runtime::micro_rpc']]],
   ['packimportstoc',['PackImportsToC',['../namespacetvm_1_1codegen.html#abf02059ebadcdb8bbbe5c840b646d67b',1,'tvm::codegen']]],
   ['packimportstollvm',['PackImportsToLLVM',['../namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6',1,'tvm::codegen']]],
-  ['pad',['pad',['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElementWise, std::string pad_mode=&quot;constant&quot;, const Array&lt; PrimExpr &gt; *dyn_output_shape=nullptr)'],['../namespacetvm_1_1topi [...]
+  ['pad',['Pad',['../namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121',1,'tvm::topi::Pad(const Array&lt; PrimExpr &gt; shape, int odim)'],['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElement [...]
   ['pad_5fmode',['pad_mode',['../structtvm_1_1relay_1_1PadAttrs.html#a5b524c3add781cd2da894e81553079f8',1,'tvm::relay::PadAttrs']]],
   ['pad_5futils_2eh',['pad_utils.h',['../pad__utils_8h.html',1,'']]],
   ['pad_5fvalue',['pad_value',['../structtvm_1_1relay_1_1SpaceToBatchNDAttrs.html#a7c0fbd47621c925a45e1074f85a6b70f',1,'tvm::relay::SpaceToBatchNDAttrs']]],
@@ -34,7 +34,7 @@ var searchData=
   ['page_5fallocator_2eh',['page_allocator.h',['../page__allocator_8h.html',1,'']]],
   ['pagememorymanagercreate',['PageMemoryManagerCreate',['../page__allocator_8h.html#a720dbc7474ac13b93fafb974cfc20bc7',1,'page_allocator.h']]],
   ['papi_2eh',['papi.h',['../papi_8h.html',1,'']]],
-  ['parallel',['Parallel',['../classtvm_1_1tir_1_1ScheduleNode.html#a553dc17c0b49b175cd16881c81b6c789',1,'tvm::tir::ScheduleNode::Parallel()'],['../classtvm_1_1auto__scheduler_1_1State.html#a2376f0180bc5b5dd4b456f2a75d4a366',1,'tvm::auto_scheduler::State::parallel()'],['../classtvm_1_1te_1_1Stage.html#a60a6be10a1a96cb594c1399efabafef3',1,'tvm::te::Stage::parallel()']]],
+  ['parallel',['parallel',['../classtvm_1_1auto__scheduler_1_1State.html#a2376f0180bc5b5dd4b456f2a75d4a366',1,'tvm::auto_scheduler::State::parallel()'],['../classtvm_1_1te_1_1Stage.html#a60a6be10a1a96cb594c1399efabafef3',1,'tvm::te::Stage::parallel()'],['../classtvm_1_1tir_1_1ScheduleNode.html#a553dc17c0b49b175cd16881c81b6c789',1,'tvm::tir::ScheduleNode::Parallel()']]],
   ['parallel_5ffor',['parallel_for',['../namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c',1,'tvm::support']]],
   ['parallel_5ffor_2eh',['parallel_for.h',['../parallel__for_8h.html',1,'']]],
   ['parallel_5ffor_5fdynamic',['parallel_for_dynamic',['../namespacetvm_1_1support.html#afe4271363c794f1644ce7af5c2266530',1,'tvm::support']]],
diff --git a/docs/reference/api/doxygen/search/all_13.js b/docs/reference/api/doxygen/search/all_13.js
index 00d68067c..86e932675 100644
--- a/docs/reference/api/doxygen/search/all_13.js
+++ b/docs/reference/api/doxygen/search/all_13.js
@@ -81,7 +81,7 @@ var searchData=
   ['registerconfigoption',['RegisterConfigOption',['../classtvm_1_1transform_1_1PassContext.html#a6f1d1040cc97320414b4690203f87919',1,'tvm::transform::PassContext']]],
   ['registergenericfunc',['RegisterGenericFunc',['../classtvm_1_1GenericFunc.html#a909acecbf2f34f847a34e587a4570dce',1,'tvm::GenericFunc']]],
   ['registerorget',['RegisterOrGet',['../classtvm_1_1OpRegEntry.html#a39a4d3e7f905eb4e29ca464bcedb05bd',1,'tvm::OpRegEntry::RegisterOrGet()'],['../classtvm_1_1relay_1_1ExecutorRegEntry.html#a03347a2b68269b853a7c0399994951ef',1,'tvm::relay::ExecutorRegEntry::RegisterOrGet()'],['../classtvm_1_1relay_1_1RuntimeRegEntry.html#ae8b479159ccd8b35b75950fcda58dd9d',1,'tvm::relay::RuntimeRegEntry::RegisterOrGet()'],['../classtvm_1_1TargetTagRegEntry.html#a07e0631600484dc0985ca62b1620461c',1,'tvm::T [...]
-  ['registry',['Registry',['../classtvm_1_1runtime_1_1Registry.html',1,'tvm::runtime::Registry'],['../classtvm_1_1ReflectionVTable_1_1Registry.html',1,'tvm::ReflectionVTable::Registry'],['../classtvm_1_1ReflectionVTable_1_1Registry.html#ac8f4637640aa9dffed745303a4cfa827',1,'tvm::ReflectionVTable::Registry::Registry()'],['../structTVMMutableFuncRegistry.html#acc1fcd6554c627c1bf3b3c00e1120e9b',1,'TVMMutableFuncRegistry::registry()'],['../structTVMModule.html#a6db21005b9e983207b341e65af4c4a [...]
+  ['registry',['Registry',['../classtvm_1_1runtime_1_1Registry.html',1,'tvm::runtime::Registry'],['../classtvm_1_1ReflectionVTable_1_1Registry.html',1,'tvm::ReflectionVTable::Registry'],['../structTVMMutableFuncRegistry.html#acc1fcd6554c627c1bf3b3c00e1120e9b',1,'TVMMutableFuncRegistry::registry()'],['../structTVMModule.html#a6db21005b9e983207b341e65af4c4ab7',1,'TVMModule::registry()'],['../classtvm_1_1ReflectionVTable_1_1Registry.html#ac8f4637640aa9dffed745303a4cfa827',1,'tvm::Reflection [...]
   ['registry_2eh',['registry.h',['../registry_8h.html',1,'']]],
   ['regname',['RegName',['../namespacetvm_1_1runtime_1_1vm.html#a3bbbf700719e9dc3dda2bc25210c18ae',1,'tvm::runtime::vm']]],
   ['reindex',['ReIndex',['../classtvm_1_1tir_1_1ScheduleNode.html#a9e36a8a0e37a76e55068dd534e28c8c5',1,'tvm::tir::ScheduleNode']]],
@@ -115,7 +115,7 @@ var searchData=
   ['rendererrors',['RenderErrors',['../classtvm_1_1ErrorReporter.html#a54699ec5f538bd207b5aa4e3f55181c6',1,'tvm::ErrorReporter']]],
   ['renewdefs',['RenewDefs',['../namespacetvm_1_1tir.html#a2e639c81d1c6875ead7764ab8a7cd553',1,'tvm::tir']]],
   ['renormalizesplitpattern',['RenormalizeSplitPattern',['../namespacetvm_1_1tir_1_1transform.html#a5c670c9efcd740f2f168b62e624c8c57',1,'tvm::tir::transform']]],
-  ['reorder',['reorder',['../classtvm_1_1auto__scheduler_1_1State.html#a16e95966b46977eff629a5f4f1564533',1,'tvm::auto_scheduler::State::reorder()'],['../classtvm_1_1te_1_1Stage.html#ad96cd240a92df9cafae89cdf2a7e302e',1,'tvm::te::Stage::reorder()'],['../classtvm_1_1tir_1_1ScheduleNode.html#a059229fe0e254961da406807a97f7a3d',1,'tvm::tir::ScheduleNode::Reorder()']]],
+  ['reorder',['Reorder',['../classtvm_1_1tir_1_1ScheduleNode.html#a059229fe0e254961da406807a97f7a3d',1,'tvm::tir::ScheduleNode::Reorder()'],['../classtvm_1_1auto__scheduler_1_1State.html#a16e95966b46977eff629a5f4f1564533',1,'tvm::auto_scheduler::State::reorder()'],['../classtvm_1_1te_1_1Stage.html#ad96cd240a92df9cafae89cdf2a7e302e',1,'tvm::te::Stage::reorder()']]],
   ['reorderstep',['ReorderStep',['../classtvm_1_1auto__scheduler_1_1ReorderStep.html',1,'tvm::auto_scheduler::ReorderStep'],['../classtvm_1_1auto__scheduler_1_1ReorderStep.html#a83b9dab5f38d5a4d42c6424ba437bc10',1,'tvm::auto_scheduler::ReorderStep::ReorderStep(int stage_id, const Array&lt; Integer &gt; &amp;after_ids)'],['../classtvm_1_1auto__scheduler_1_1ReorderStep.html#a9586534afef3e0f57ab31e8374e70792',1,'tvm::auto_scheduler::ReorderStep::ReorderStep(dmlc::JSONReader *reader)']]],
   ['reorderstepnode',['ReorderStepNode',['../classtvm_1_1auto__scheduler_1_1ReorderStepNode.html',1,'tvm::auto_scheduler']]],
   ['reorg',['reorg',['../namespacetvm_1_1topi_1_1vision.html#a1014df582489005202c4218e51792314',1,'tvm::topi::vision']]],
@@ -139,7 +139,7 @@ var searchData=
   ['required',['required',['../classtvm_1_1transform_1_1PassInfoNode.html#abc4fdfbc0e6db41ae6296d7b2816b534',1,'tvm::transform::PassInfoNode']]],
   ['required_5fpass',['required_pass',['../classtvm_1_1transform_1_1PassContextNode.html#a029074685b6cfcc0431098697f2bc927',1,'tvm::transform::PassContextNode']]],
   ['reserve',['reserve',['../classtvm_1_1runtime_1_1Array.html#a1a7727b86efaf35c58a5198ab1c139c8',1,'tvm::runtime::Array']]],
-  ['reset',['reset',['../classtvm_1_1runtime_1_1NDArray.html#af2a8ccab95d432d1ecad7a389e11bcd3',1,'tvm::runtime::NDArray::reset()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#ac4461465ba0e785794794e0405c96590',1,'tvm::runtime::ObjectPtr::reset()'],['../classtvm_1_1auto__scheduler_1_1ProgramMeasurerNode.html#a73b14ea360a9902c291d5bf6e97636cd',1,'tvm::auto_scheduler::ProgramMeasurerNode::Reset()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1Unframer.html#ae6279154fe70e9eb85937b51e70a4bf8',1, [...]
+  ['reset',['Reset',['../classtvm_1_1auto__scheduler_1_1ProgramMeasurerNode.html#a73b14ea360a9902c291d5bf6e97636cd',1,'tvm::auto_scheduler::ProgramMeasurerNode::Reset()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1Unframer.html#ae6279154fe70e9eb85937b51e70a4bf8',1,'tvm::runtime::micro_rpc::Unframer::Reset()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1Framer.html#a44ff9650ecca8785e33c25c369d2570a',1,'tvm::runtime::micro_rpc::Framer::Reset()'],['../classtvm_1_1tir_1_1StmtSRefNode.html#a0a81 [...]
   ['reset_5fattr',['reset_attr',['../classtvm_1_1OpRegEntry.html#a67628f8d3d6dea5b0a47e462c06b7790',1,'tvm::OpRegEntry']]],
   ['resetthreadpool',['ResetThreadPool',['../namespacetvm_1_1runtime_1_1threading.html#aafdb21c00248ff146b614a7e888b4fd7',1,'tvm::runtime::threading']]],
   ['reshape',['reshape',['../namespacetvm_1_1topi.html#a3aad65f2505802109ba7d05359ce9005',1,'tvm::topi']]],
diff --git a/docs/reference/api/doxygen/search/all_14.js b/docs/reference/api/doxygen/search/all_14.js
index 6b627b72a..f343f58e4 100644
--- a/docs/reference/api/doxygen/search/all_14.js
+++ b/docs/reference/api/doxygen/search/all_14.js
@@ -50,7 +50,7 @@ var searchData=
   ['schedule_5fsoftmax',['schedule_softmax',['../namespacetvm_1_1topi_1_1cuda.html#a4a2a33ae8186abc6af4dae2ffd12ff91',1,'tvm::topi::cuda::schedule_softmax()'],['../namespacetvm_1_1topi_1_1rocm.html#ab71ce2b3685f0ce5f30d2d661c5e799b',1,'tvm::topi::rocm::schedule_softmax()']]],
   ['scheduledebugmask',['ScheduleDebugMask',['../namespacetvm_1_1tir.html#a230fa4eb6152910f125f636dab3bd4e0',1,'tvm::tir']]],
   ['scheduleerrorrenderlevel',['ScheduleErrorRenderLevel',['../namespacetvm_1_1tir.html#a9ae244600a5e56c4adc9faf6d88f931e',1,'tvm::tir']]],
-  ['schedulenode',['ScheduleNode',['../classtvm_1_1te_1_1ScheduleNode.html',1,'tvm::te::ScheduleNode'],['../classtvm_1_1tir_1_1ScheduleNode.html',1,'tvm::tir::ScheduleNode']]],
+  ['schedulenode',['ScheduleNode',['../classtvm_1_1tir_1_1ScheduleNode.html',1,'tvm::tir::ScheduleNode'],['../classtvm_1_1te_1_1ScheduleNode.html',1,'tvm::te::ScheduleNode']]],
   ['scheduleops',['ScheduleOps',['../namespacetvm_1_1te.html#ac5f0fdd7c2d3deb15b7855c5b1ff1aff',1,'tvm::te']]],
   ['schedulepostproctoprimfunc',['SchedulePostProcToPrimFunc',['../namespacetvm_1_1te.html#a9eb39ad3aa5af348b1b27e755074f525',1,'tvm::te']]],
   ['schedulereduce',['ScheduleReduce',['../namespacetvm_1_1topi_1_1cuda.html#a3dbbf8bdb78533c15e62ab0e874eb360',1,'tvm::topi::cuda']]],
@@ -139,6 +139,7 @@ var searchData=
   ['set_5fscope',['set_scope',['../classtvm_1_1te_1_1Stage.html#a0dc42f190125f0cf63e8d749ae66af7c',1,'tvm::te::Stage']]],
   ['set_5fstore_5fpredicate',['set_store_predicate',['../classtvm_1_1te_1_1Stage.html#a0056636228aed23f71eecc0810731436',1,'tvm::te::Stage']]],
   ['set_5fsupport_5flevel',['set_support_level',['../classtvm_1_1OpRegEntry.html#ab4f7e0f99c8acf2153e15f7cbb6c3c97',1,'tvm::OpRegEntry']]],
+  ['set_5ftarget_5fparser',['set_target_parser',['../classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8',1,'tvm::TargetKindRegEntry']]],
   ['set_5fupper_5fbound',['set_upper_bound',['../structtvm_1_1detail_1_1AttrNopEntry.html#add2843b725ee43be26672a8d2d641cce',1,'tvm::detail::AttrNopEntry::set_upper_bound()'],['../structtvm_1_1detail_1_1AttrInitEntry.html#a69e876dfc10eed9573c3043ea5ef2013',1,'tvm::detail::AttrInitEntry::set_upper_bound()'],['../classtvm_1_1detail_1_1AttrDocEntry.html#aec039b071d826ab164c5abe123aefaa3',1,'tvm::detail::AttrDocEntry::set_upper_bound()'],['../structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry [...]
   ['setargs',['SetArgs',['../structTVMPackedFunc.html#a455396ff85d8e8cff95e39ae15ad5cc6',1,'TVMPackedFunc']]],
   ['setaxisseparator',['SetAxisSeparator',['../classtvm_1_1tir_1_1ScheduleNode.html#a025b5eef0c2516fc1f72eed9ced88807',1,'tvm::tir::ScheduleNode']]],
@@ -165,7 +166,7 @@ var searchData=
   ['setvalue_3c_20uint64_5ft_20_3e',['SetValue&lt; uint64_t &gt;',['../namespacetvm_1_1detail.html#acb3382242cbf538f64edae13e4ec5a84',1,'tvm::detail']]],
   ['shallowcopy',['ShallowCopy',['../classtvm_1_1IRModuleNode.html#a86bbdc4b857ce5958a2b5f29e1d6fcb6',1,'tvm::IRModuleNode']]],
   ['shallowcopyirmodule',['ShallowCopyIRModule',['../classtvm_1_1IRModule.html#aea8b821cf92cf525bd87bf15f5d31889',1,'tvm::IRModule']]],
-  ['shape',['shape',['../classtvm_1_1TensorTypeNode.html#a98fa347833e4504dd6f8056d9863a708',1,'tvm::TensorTypeNode::shape()'],['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html#ac16d3b10f7c68eefb27e55e865bb304c',1,'tvm::meta_schedule::TensorInfoNode::shape()'],['../structtvm_1_1relay_1_1InitOpAttrs.html#aaaec76cc5ea9a543c4ea174a6b38bf5e',1,'tvm::relay::InitOpAttrs::shape()'],['../classtvm_1_1relay_1_1ShapePatternNode.html#a749813cbbd38f8021a7df897d527d6e0',1,'tvm::relay::ShapePattern [...]
+  ['shape',['shape',['../classtvm_1_1TensorTypeNode.html#a98fa347833e4504dd6f8056d9863a708',1,'tvm::TensorTypeNode::shape()'],['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html#ac16d3b10f7c68eefb27e55e865bb304c',1,'tvm::meta_schedule::TensorInfoNode::shape()'],['../structtvm_1_1relay_1_1InitOpAttrs.html#aaaec76cc5ea9a543c4ea174a6b38bf5e',1,'tvm::relay::InitOpAttrs::shape()'],['../classtvm_1_1relay_1_1ShapePatternNode.html#a749813cbbd38f8021a7df897d527d6e0',1,'tvm::relay::ShapePattern [...]
   ['shape_5f',['shape_',['../classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html#aa5597a1760c9f8c9d1fd51584b1283fb',1,'tvm::runtime::NDArray::ContainerBase']]],
   ['shape_5fbackward_5frule',['shape_backward_rule',['../classtvm_1_1tir_1_1BijectiveLayoutNode.html#a0befdd0a2371c0d12970e8ac6623b59b',1,'tvm::tir::BijectiveLayoutNode']]],
   ['shape_5fcount',['shape_count',['../structTVMGraphExecutorGraphAttr.html#a182b228582f1186f2a15de50a25b3375',1,'TVMGraphExecutorGraphAttr']]],
@@ -213,7 +214,7 @@ var searchData=
   ['singleton',['Singleton',['../classtvm_1_1te_1_1Singleton.html',1,'tvm::te::Singleton'],['../classtvm_1_1te_1_1Singleton.html#a94450b853dcd5e9865546d8c8fe351a1',1,'tvm::te::Singleton::Singleton()']]],
   ['singletonnode',['SingletonNode',['../classtvm_1_1te_1_1SingletonNode.html',1,'tvm::te']]],
   ['sinh',['sinh',['../namespacetvm.html#ad828bc801c73df761c58d9f8877d52ee',1,'tvm::sinh()'],['../namespacetvm_1_1topi.html#af9694f5470ba2cabc19866be3b00fe8d',1,'tvm::topi::sinh()']]],
-  ['size',['size',['../structtvm_1_1relay_1_1Resize1DAttrs.html#afb1175c0ff019e485ed65d98305b5f62',1,'tvm::relay::Resize1DAttrs::size()'],['../structtvm_1_1relay_1_1Resize2DAttrs.html#ab3e26dbbc2dc1da40764832a99459c30',1,'tvm::relay::Resize2DAttrs::size()'],['../structtvm_1_1relay_1_1Resize3DAttrs.html#aab61649fe8417a8a7fbc849090bac083',1,'tvm::relay::Resize3DAttrs::size()'],['../structtvm_1_1relay_1_1LRNAttrs.html#a3758ed1f8a8bcf73008ae1dd2bfa148e',1,'tvm::relay::LRNAttrs::size()'],['.. [...]
+  ['size',['Size',['../classtvm_1_1TensorTypeNode.html#a1f08dac86ae8aea81d058ef64cfd38b4',1,'tvm::TensorTypeNode::Size()'],['../classtvm_1_1meta__schedule_1_1DatabaseNode.html#aae5b9ab9f7e497654b90c23a2159a5cc',1,'tvm::meta_schedule::DatabaseNode::Size()'],['../classtvm_1_1meta__schedule_1_1PyDatabaseNode.html#a36817d04978253571fef7d01427ce9c0',1,'tvm::meta_schedule::PyDatabaseNode::Size()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1FrameBuffer.html#ae395a0f1c6e79e825aa7a244c74a5d7b',1,' [...]
   ['size_5f',['size_',['../classtvm_1_1runtime_1_1MapNode.html#a2285f106f6afa29f512a7818ad59e9e5',1,'tvm::runtime::MapNode']]],
   ['size_5fbytes',['size_bytes',['../structtvm_1_1tir_1_1usmp_1_1BufferInfoNode.html#a0a5d4bd6072c268df05b90d267b4c0a0',1,'tvm::tir::usmp::BufferInfoNode']]],
   ['size_5fhint_5fbytes',['size_hint_bytes',['../structtvm_1_1PoolInfoNode.html#ac073aeb75bf031ff8687e132bc112f92',1,'tvm::PoolInfoNode::size_hint_bytes()'],['../structtvm_1_1PoolInfoPropertiesNode.html#aed7c5573ffc8db9424e77e3a85cad120',1,'tvm::PoolInfoPropertiesNode::size_hint_bytes()']]],
@@ -270,7 +271,7 @@ var searchData=
   ['specialize',['Specialize',['../namespacetvm_1_1tir.html#a69b6f1b0014dc6e7dd390cff746e9782',1,'tvm::tir']]],
   ['specializedcondition',['SpecializedCondition',['../classtvm_1_1te_1_1SpecializedCondition.html',1,'tvm::te::SpecializedCondition'],['../classtvm_1_1te_1_1SpecializedCondition.html#a48d119ee1c6033929a5592cfc2592e60',1,'tvm::te::SpecializedCondition::SpecializedCondition()']]],
   ['specializedconditionnode',['SpecializedConditionNode',['../classtvm_1_1te_1_1SpecializedConditionNode.html',1,'tvm::te']]],
-  ['split',['Split',['../classtvm_1_1te_1_1Split.html',1,'tvm::te::Split'],['../classtvm_1_1auto__scheduler_1_1State.html#a5815f21fc90ba7cc379c2410c05ab54c',1,'tvm::auto_scheduler::State::split()'],['../classtvm_1_1te_1_1Stage.html#a5a7cd562be59b68a187ad97085a3425d',1,'tvm::te::Stage::split()'],['../classtvm_1_1te_1_1Split.html#a328e0c093ce5b41ebaf33e0e80592764',1,'tvm::te::Split::Split()'],['../classtvm_1_1tir_1_1Layout.html#ad7657af7789fe040d3224c0149976bb4',1,'tvm::tir::Layout::Split( [...]
+  ['split',['Split',['../classtvm_1_1te_1_1Split.html',1,'tvm::te::Split'],['../classtvm_1_1te_1_1Split.html#a328e0c093ce5b41ebaf33e0e80592764',1,'tvm::te::Split::Split()'],['../classtvm_1_1tir_1_1Layout.html#ad7657af7789fe040d3224c0149976bb4',1,'tvm::tir::Layout::Split()'],['../classtvm_1_1tir_1_1ScheduleNode.html#ac190a0ab76d8754a35209479bcc6dfa2',1,'tvm::tir::ScheduleNode::Split()'],['../classtvm_1_1auto__scheduler_1_1State.html#a5815f21fc90ba7cc379c2410c05ab54c',1,'tvm::auto_schedule [...]
   ['split_5fby_5fnparts',['split_by_nparts',['../classtvm_1_1te_1_1Stage.html#a51432f38d9ec4792a2525023179ae604',1,'tvm::te::Stage']]],
   ['split_5fsections',['split_sections',['../namespacetvm_1_1topi.html#acc643e2ed166fa2ed82a95853e145619',1,'tvm::topi']]],
   ['splitargs',['SplitArgs',['../namespacetvm_1_1relay_1_1transform.html#a2425d757b896168a109498e8d34ba960',1,'tvm::relay::transform']]],
@@ -300,7 +301,7 @@ var searchData=
   ['stackmemorymanager_5ffree',['StackMemoryManager_Free',['../stack__allocator_8h.html#a1e5d35061f8f72d784a5792086f611db',1,'stack_allocator.h']]],
   ['stackmemorymanager_5ffree_5fbody',['StackMemoryManager_Free_Body',['../stack__allocator_8h.html#ac30e6df6e876d5d65a7e86e4dd0e0e95',1,'stack_allocator.h']]],
   ['stackmemorymanager_5finit',['StackMemoryManager_Init',['../stack__allocator_8h.html#a3fd459f5167dacf85850c929d1b38882',1,'stack_allocator.h']]],
-  ['stage',['Stage',['../classtvm_1_1auto__scheduler_1_1Stage.html',1,'tvm::auto_scheduler::Stage'],['../classtvm_1_1te_1_1Stage.html',1,'tvm::te::Stage'],['../classtvm_1_1auto__scheduler_1_1Stage.html#a39ffbb1b4e189180bc4067e74965f42b',1,'tvm::auto_scheduler::Stage::Stage(te::Operation op)'],['../classtvm_1_1auto__scheduler_1_1Stage.html#af0643fe8c1298451c9a322f915c48843',1,'tvm::auto_scheduler::Stage::Stage(te::Operation op, StageKind op_type, const Array&lt; Iterator &gt; &amp;iters,  [...]
+  ['stage',['Stage',['../classtvm_1_1te_1_1Stage.html',1,'tvm::te::Stage'],['../classtvm_1_1auto__scheduler_1_1Stage.html',1,'tvm::auto_scheduler::Stage'],['../classtvm_1_1auto__scheduler_1_1Stage.html#a39ffbb1b4e189180bc4067e74965f42b',1,'tvm::auto_scheduler::Stage::Stage(te::Operation op)'],['../classtvm_1_1auto__scheduler_1_1Stage.html#af0643fe8c1298451c9a322f915c48843',1,'tvm::auto_scheduler::Stage::Stage(te::Operation op, StageKind op_type, const Array&lt; Iterator &gt; &amp;iters,  [...]
   ['stage_5fid',['stage_id',['../classtvm_1_1auto__scheduler_1_1StepNode.html#afcc7aaf263348f66139307affbfcee09',1,'tvm::auto_scheduler::StepNode']]],
   ['stage_5fmap',['stage_map',['../classtvm_1_1te_1_1ScheduleNode.html#a612223aec2751cbd035a18c9e5453085',1,'tvm::te::ScheduleNode']]],
   ['stage_5fpipeline',['stage_pipeline',['../classtvm_1_1tir_1_1BlockScopeNode.html#ace2ff75b4be37feeccbc5e12b1c8d30e',1,'tvm::tir::BlockScopeNode']]],
@@ -308,7 +309,7 @@ var searchData=
   ['stageattributes',['StageAttributes',['../structtvm_1_1auto__scheduler_1_1StageAttributes.html',1,'tvm::auto_scheduler']]],
   ['stagekey',['StageKey',['../namespacetvm_1_1auto__scheduler.html#afc2e977e1bb9b11fc3b78758d997eb60',1,'tvm::auto_scheduler']]],
   ['stagekind',['StageKind',['../namespacetvm_1_1auto__scheduler.html#af6533a065c0157391331e89a0e95f35a',1,'tvm::auto_scheduler']]],
-  ['stagenode',['StageNode',['../classtvm_1_1auto__scheduler_1_1StageNode.html',1,'tvm::auto_scheduler::StageNode'],['../classtvm_1_1te_1_1StageNode.html',1,'tvm::te::StageNode']]],
+  ['stagenode',['StageNode',['../classtvm_1_1te_1_1StageNode.html',1,'tvm::te::StageNode'],['../classtvm_1_1auto__scheduler_1_1StageNode.html',1,'tvm::auto_scheduler::StageNode']]],
   ['stages',['stages',['../classtvm_1_1auto__scheduler_1_1StateNode.html#a881e14990bf228ee3fddb3721c451b9e',1,'tvm::auto_scheduler::StateNode::stages()'],['../classtvm_1_1te_1_1ScheduleNode.html#ab5649969db603d6b7b4d155c0d09cdd5',1,'tvm::te::ScheduleNode::stages()']]],
   ['stagetoaxesmap',['StageToAxesMap',['../namespacetvm_1_1auto__scheduler.html#a8f12e558fc4b8fbb990e7e204c06beeb',1,'tvm::auto_scheduler']]],
   ['start',['start',['../structtvm_1_1relay_1_1ArangeAttrs.html#ae8ae5bc1551b406a4f52395af343c2ce',1,'tvm::relay::ArangeAttrs::start()'],['../classtvm_1_1runtime_1_1TimerNode.html#aa11fc338c39ee2137448e54a10efe0ae',1,'tvm::runtime::TimerNode::Start()'],['../classtvm_1_1runtime_1_1Timer.html#a89bcaa433499bc68902cb473d5eba6ca',1,'tvm::runtime::Timer::Start()'],['../classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html#a44fadfb7b0f961a7fb2275e3b5dbcd88',1,'tvm::runtime::profiling::Me [...]
@@ -348,9 +349,9 @@ var searchData=
   ['stmtsref',['StmtSRef',['../classtvm_1_1tir_1_1StmtSRef.html',1,'tvm::tir::StmtSRef'],['../classtvm_1_1tir_1_1StmtSRef.html#a31687ace5dc4fe487ffb87d658d86412',1,'tvm::tir::StmtSRef::StmtSRef()']]],
   ['stmtsrefnode',['StmtSRefNode',['../classtvm_1_1tir_1_1StmtSRefNode.html',1,'tvm::tir']]],
   ['stmtvisitor',['StmtVisitor',['../classtvm_1_1tir_1_1StmtVisitor.html',1,'tvm::tir']]],
-  ['stop',['stop',['../structtvm_1_1relay_1_1ArangeAttrs.html#a1eadf1f3964ca83dade8edeae7d6d7cf',1,'tvm::relay::ArangeAttrs::stop()'],['../classtvm_1_1runtime_1_1TimerNode.html#a67eb764f2c9e3fb7c2708f01c0c35683',1,'tvm::runtime::TimerNode::Stop()'],['../classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html#aca9679dd49dfbc886b9dc99539cbf0e6',1,'tvm::runtime::profiling::MetricCollectorNode::Stop()'],['../classtvm_1_1runtime_1_1profiling_1_1Profiler.html#aa2000d8cd1970b5d29139ab18313 [...]
+  ['stop',['Stop',['../classtvm_1_1runtime_1_1TimerNode.html#a67eb764f2c9e3fb7c2708f01c0c35683',1,'tvm::runtime::TimerNode::Stop()'],['../classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html#aca9679dd49dfbc886b9dc99539cbf0e6',1,'tvm::runtime::profiling::MetricCollectorNode::Stop()'],['../classtvm_1_1runtime_1_1profiling_1_1Profiler.html#aa2000d8cd1970b5d29139ab1831394f0',1,'tvm::runtime::profiling::Profiler::Stop()'],['../structtvm_1_1relay_1_1ArangeAttrs.html#a1eadf1f3964ca83dad [...]
   ['stopcall',['StopCall',['../classtvm_1_1runtime_1_1profiling_1_1Profiler.html#ad5e6a8e8c9d915c80f494138eedfec3f',1,'tvm::runtime::profiling::Profiler']]],
-  ['storage',['Storage',['../classtvm_1_1runtime_1_1vm_1_1Storage.html',1,'tvm::runtime::vm::Storage'],['../classtvm_1_1runtime_1_1vm_1_1Storage.html#aff0c1264864e6205cfa468f069f62f55',1,'tvm::runtime::vm::Storage::Storage()'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a3412cabd3b4f42f106f56fc22257f6ca',1,'tvm::runtime::vm::Instruction::storage()']]],
+  ['storage',['Storage',['../classtvm_1_1runtime_1_1vm_1_1Storage.html',1,'tvm::runtime::vm::Storage'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a3412cabd3b4f42f106f56fc22257f6ca',1,'tvm::runtime::vm::Instruction::storage()'],['../classtvm_1_1runtime_1_1vm_1_1Storage.html#aff0c1264864e6205cfa468f069f62f55',1,'tvm::runtime::vm::Storage::Storage()']]],
   ['storage_5falign',['storage_align',['../classtvm_1_1auto__scheduler_1_1State.html#ab006690418e43cc9b7ad021c02657ed6',1,'tvm::auto_scheduler::State::storage_align()'],['../classtvm_1_1te_1_1Stage.html#aa73e3a269d84c3b4f0a1994371d67bab',1,'tvm::te::Stage::storage_align()']]],
   ['storage_5falignment',['storage_alignment',['../namespacetvm_1_1tir_1_1attr.html#af27d464f2065dc5f77408df7b94d4bb6',1,'tvm::tir::attr']]],
   ['storage_5fid',['storage_id',['../structTVMGraphExecutorGraphAttr.html#a8a0d6d05adcffbf499aafb6a6700c400',1,'TVMGraphExecutorGraphAttr']]],
@@ -367,7 +368,7 @@ var searchData=
   ['store',['Store',['../classtvm_1_1tir_1_1Store.html',1,'tvm::tir::Store'],['../classtvm_1_1tir_1_1Store.html#a2c4278b8bcdae57ada2022ecc7c290c3',1,'tvm::tir::Store::Store()']]],
   ['store_5fpredicate',['store_predicate',['../classtvm_1_1te_1_1StageNode.html#a8f4ba7f2931b3541c12734af511600a7',1,'tvm::te::StageNode']]],
   ['storenode',['StoreNode',['../classtvm_1_1tir_1_1StoreNode.html',1,'tvm::tir']]],
-  ['str',['str',['../classtvm_1_1TargetNode.html#a30cd67db46a9c4b098a8ba38fff22e26',1,'tvm::TargetNode::str()'],['../classtvm_1_1script_1_1printer_1_1LiteralDoc.html#a789d7d73bd4d94612fa2a84c16b26b89',1,'tvm::script::printer::LiteralDoc::Str()']]],
+  ['str',['Str',['../classtvm_1_1script_1_1printer_1_1LiteralDoc.html#a789d7d73bd4d94612fa2a84c16b26b89',1,'tvm::script::printer::LiteralDoc::Str()'],['../classtvm_1_1TargetNode.html#a30cd67db46a9c4b098a8ba38fff22e26',1,'tvm::TargetNode::str()']]],
   ['str2set',['Str2Set',['../namespacetvm_1_1topi.html#af01f6cc6b977801126083f0faffe252b',1,'tvm::topi']]],
   ['stream',['stream',['../classtvm_1_1ReprPrinter.html#a036409dcdcf6f0ac5c6d7d27ec60ed94',1,'tvm::ReprPrinter']]],
   ['streamsync',['StreamSync',['../classtvm_1_1runtime_1_1DeviceAPI.html#ac29b9295c432a87658392872c644864f',1,'tvm::runtime::DeviceAPI']]],
diff --git a/docs/reference/api/doxygen/search/all_15.js b/docs/reference/api/doxygen/search/all_15.js
index fe312f274..ed4f73bbd 100644
--- a/docs/reference/api/doxygen/search/all_15.js
+++ b/docs/reference/api/doxygen/search/all_15.js
@@ -35,16 +35,18 @@ var searchData=
   ['takeattrs',['TakeAttrs',['../structtvm_1_1relay_1_1TakeAttrs.html',1,'tvm::relay']]],
   ['tan',['tan',['../namespacetvm.html#af99838098788d40c80b402f29b3c2e8c',1,'tvm::tan()'],['../namespacetvm_1_1topi.html#a13b757fe52775f43a58d91c0a1330f97',1,'tvm::topi::tan()']]],
   ['tanh',['tanh',['../namespacetvm.html#a12c5457301d8a2c03a2ba1163edd7cee',1,'tvm::tanh()'],['../namespacetvm_1_1topi.html#aec153e599d33c78a7592007cde1c02cb',1,'tvm::topi::tanh()']]],
-  ['target',['Target',['../classtvm_1_1Target.html',1,'tvm::Target'],['../classtvm_1_1auto__scheduler_1_1SearchTaskNode.html#acf4407e0c8dced81b05b34ec0426c933',1,'tvm::auto_scheduler::SearchTaskNode::target()'],['../classtvm_1_1meta__schedule_1_1BuilderInputNode.html#afc001f3e427cfc8c05236b615cfd2868',1,'tvm::meta_schedule::BuilderInputNode::target()'],['../classtvm_1_1meta__schedule_1_1TuningRecordNode.html#a45a380cfa2edfd63056fb1a00f9aac35',1,'tvm::meta_schedule::TuningRecordNode::targ [...]
+  ['target',['Target',['../classtvm_1_1Target.html',1,'tvm::Target'],['../classtvm_1_1Target.html#a58a5a1e042e265fe5a6973045226fe1a',1,'tvm::Target::Target(std::nullptr_t)'],['../classtvm_1_1Target.html#a77f3d7cc97d8cfd7172af58b4e784d89',1,'tvm::Target::Target(const String &amp;tag_or_config_or_target_str)'],['../classtvm_1_1Target.html#ab825b350cf478bf948d807b6fdf636a0',1,'tvm::Target::Target(const Map&lt; String, ObjectRef &gt; &amp;config)'],['../classtvm_1_1Target.html#a1abb29217d8e3 [...]
   ['target_2eh',['target.h',['../target_8h.html',1,'']]],
   ['target_5fburst_5fbytes',['target_burst_bytes',['../structtvm_1_1PoolInfoNode.html#a747c03e3eafc83b053637b735244c6d7',1,'tvm::PoolInfoNode::target_burst_bytes()'],['../structtvm_1_1PoolInfoPropertiesNode.html#aa1efe29e920f5b003894a2ae3304da17',1,'tvm::PoolInfoPropertiesNode::target_burst_bytes()']]],
   ['target_5fhost',['target_host',['../classtvm_1_1auto__scheduler_1_1SearchTaskNode.html#afe27bf8cb82dc8a1b6fffb9e5a3e6c20',1,'tvm::auto_scheduler::SearchTaskNode']]],
   ['target_5finfo_2eh',['target_info.h',['../target__info_8h.html',1,'']]],
   ['target_5fiter_5fid',['target_iter_id',['../classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html#a5691967a42b989a54cf8c40c1627988e',1,'tvm::auto_scheduler::ComputeAtStepNode']]],
   ['target_5fkind_2eh',['target_kind.h',['../target__kind_8h.html',1,'']]],
+  ['target_5fparser',['target_parser',['../classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f',1,'tvm::TargetKindNode']]],
   ['target_5fshape',['target_shape',['../structtvm_1_1relay_1_1AffineGridAttrs.html#a3e8a722c28015e4fa002da324fc6d5b3',1,'tvm::relay::AffineGridAttrs']]],
   ['target_5fstage_5fid',['target_stage_id',['../classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html#ad3c69aebb4b821c8e975ce0c58dc8bbb',1,'tvm::auto_scheduler::ComputeAtStepNode']]],
   ['targetinternal',['TargetInternal',['../classtvm_1_1TargetNode.html#a7924ccb2fdea6074cca1978c062fb034',1,'tvm::TargetNode::TargetInternal()'],['../classtvm_1_1Target.html#a7924ccb2fdea6074cca1978c062fb034',1,'tvm::Target::TargetInternal()'],['../classtvm_1_1TargetKindNode.html#a7924ccb2fdea6074cca1978c062fb034',1,'tvm::TargetKindNode::TargetInternal()'],['../classtvm_1_1TargetKind.html#a7924ccb2fdea6074cca1978c062fb034',1,'tvm::TargetKind::TargetInternal()']]],
+  ['targetjson',['TargetJSON',['../namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c',1,'tvm']]],
   ['targetkind',['TargetKind',['../classtvm_1_1TargetKind.html',1,'tvm::TargetKind'],['../classtvm_1_1TargetKindRegEntry.html#a75150485a300a03a22d9edad8619cc25',1,'tvm::TargetKindRegEntry::TargetKind()'],['../classtvm_1_1TargetKind.html#ae811f35863758fb8d1340b14df5cd5fb',1,'tvm::TargetKind::TargetKind()']]],
   ['targetkindattrmap',['TargetKindAttrMap',['../classtvm_1_1TargetKindAttrMap.html',1,'tvm::TargetKindAttrMap&lt; ValueType &gt;'],['../classtvm_1_1TargetKindAttrMap.html#abfd817be636e60822a33429ba30056bd',1,'tvm::TargetKindAttrMap::TargetKindAttrMap()']]],
   ['targetkindnode',['TargetKindNode',['../classtvm_1_1TargetKindNode.html',1,'tvm']]],
@@ -67,7 +69,7 @@ var searchData=
   ['te_5ffilter_5ffunc',['te_filter_func',['../classtvm_1_1meta__schedule_1_1ApplyHistoryBestNode.html#ac26042b7c3d559f7306a53b148860795',1,'tvm::meta_schedule::ApplyHistoryBestNode']]],
   ['tempexpr',['TempExpr',['../classtvm_1_1relay_1_1TempExpr.html',1,'tvm::relay']]],
   ['tempexprnode',['TempExprNode',['../classtvm_1_1relay_1_1TempExprNode.html',1,'tvm::relay']]],
-  ['tensor',['Tensor',['../classtvm_1_1te_1_1Tensor.html',1,'tvm::te::Tensor'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a22de469ea5521ba12e14f1e8181bae56',1,'tvm::runtime::vm::Instruction::tensor()'],['../classtvm_1_1te_1_1Tensor.html#afc8d8e74d1c840359661b39514d6fecf',1,'tvm::te::Tensor::Tensor()']]],
+  ['tensor',['Tensor',['../classtvm_1_1te_1_1Tensor.html',1,'tvm::te::Tensor'],['../classtvm_1_1te_1_1Tensor.html#afc8d8e74d1c840359661b39514d6fecf',1,'tvm::te::Tensor::Tensor()'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a22de469ea5521ba12e14f1e8181bae56',1,'tvm::runtime::vm::Instruction::tensor()']]],
   ['tensor_2eh',['tensor.h',['../tensor_8h.html',1,'']]],
   ['tensor_5fintrin',['tensor_intrin',['../classtvm_1_1te_1_1IterVarAttrNode.html#a6a0d96bbebfd716f851b2ad01738cb3f',1,'tvm::te::IterVarAttrNode']]],
   ['tensor_5fintrin_2eh',['tensor_intrin.h',['../tensor__intrin_8h.html',1,'']]],
@@ -81,11 +83,11 @@ var searchData=
   ['tensordom',['TensorDom',['../structtvm_1_1te_1_1TensorDom.html',1,'tvm::te::TensorDom'],['../structtvm_1_1te_1_1TensorDom.html#a1c1c057115d9dbeec11ff717bf18430b',1,'tvm::te::TensorDom::TensorDom()']]],
   ['tensordot',['tensordot',['../namespacetvm_1_1topi.html#abf2712c8265393c0582c9c7d5ae22da1',1,'tvm::topi::tensordot(const Tensor &amp;A, const tvm::te::Tensor &amp;B, int axes=2, std::string name=&quot;T_tensordot&quot;, std::string tag=kMatMul)'],['../namespacetvm_1_1topi.html#ac1a0190228326bbe8b98622c9696285e',1,'tvm::topi::tensordot(const Tensor &amp;A, const tvm::te::Tensor &amp;B, Array&lt; PrimExpr &gt; A_axes, Array&lt; PrimExpr &gt; B_axes, std::string name=&quot;T_tensordot&qu [...]
   ['tensorinfo',['TensorInfo',['../classtvm_1_1meta__schedule_1_1TensorInfo.html',1,'tvm::meta_schedule::TensorInfo'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfo.html',1,'tvm::runtime::metadata::TensorInfo'],['../classtvm_1_1meta__schedule_1_1TensorInfo.html#a4e465d00677302c2b6727f9caa248ca3',1,'tvm::meta_schedule::TensorInfo::TensorInfo()'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfo.html#a5150502c6ab08c7878538188939958d1',1,'tvm::runtime::metadata::TensorInfo::TensorInfo()']]],
-  ['tensorinfonode',['TensorInfoNode',['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html',1,'tvm::meta_schedule::TensorInfoNode'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfoNode.html',1,'tvm::runtime::metadata::TensorInfoNode'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfoNode.html#a515ed589d62fb49976baabcaaffd106a',1,'tvm::runtime::metadata::TensorInfoNode::TensorInfoNode()']]],
+  ['tensorinfonode',['TensorInfoNode',['../classtvm_1_1runtime_1_1metadata_1_1TensorInfoNode.html',1,'tvm::runtime::metadata::TensorInfoNode'],['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html',1,'tvm::meta_schedule::TensorInfoNode'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfoNode.html#a515ed589d62fb49976baabcaaffd106a',1,'tvm::runtime::metadata::TensorInfoNode::TensorInfoNode()']]],
   ['tensorintrin',['TensorIntrin',['../classtvm_1_1tir_1_1TensorIntrin.html',1,'tvm::tir::TensorIntrin'],['../classtvm_1_1te_1_1TensorIntrin.html',1,'tvm::te::TensorIntrin'],['../classtvm_1_1te_1_1TensorIntrin.html#a4ff4237911227bf80b3076906dc3b7ea',1,'tvm::te::TensorIntrin::TensorIntrin()'],['../classtvm_1_1tir_1_1TensorIntrin.html#af5a94c7b098b56056e02eaf187e6871c',1,'tvm::tir::TensorIntrin::TensorIntrin()']]],
   ['tensorintrincall',['TensorIntrinCall',['../classtvm_1_1te_1_1TensorIntrinCall.html',1,'tvm::te::TensorIntrinCall'],['../classtvm_1_1te_1_1TensorIntrinCall.html#a91c10074ce6babeba78fe72a0aab4b52',1,'tvm::te::TensorIntrinCall::TensorIntrinCall()']]],
   ['tensorintrincallnode',['TensorIntrinCallNode',['../classtvm_1_1te_1_1TensorIntrinCallNode.html',1,'tvm::te']]],
-  ['tensorintrinnode',['TensorIntrinNode',['../classtvm_1_1tir_1_1TensorIntrinNode.html',1,'tvm::tir::TensorIntrinNode'],['../classtvm_1_1te_1_1TensorIntrinNode.html',1,'tvm::te::TensorIntrinNode'],['../classtvm_1_1te_1_1TensorIntrinNode.html#ad59e7f2b881fc798a8c64fd3959f929c',1,'tvm::te::TensorIntrinNode::TensorIntrinNode()']]],
+  ['tensorintrinnode',['TensorIntrinNode',['../classtvm_1_1te_1_1TensorIntrinNode.html',1,'tvm::te::TensorIntrinNode'],['../classtvm_1_1tir_1_1TensorIntrinNode.html',1,'tvm::tir::TensorIntrinNode'],['../classtvm_1_1te_1_1TensorIntrinNode.html#ad59e7f2b881fc798a8c64fd3959f929c',1,'tvm::te::TensorIntrinNode::TensorIntrinNode()']]],
   ['tensorize',['Tensorize',['../classtvm_1_1tir_1_1ScheduleNode.html#ae3794a03b566e5b1721b44c564992975',1,'tvm::tir::ScheduleNode::Tensorize(const LoopRV &amp;loop_rv, const String &amp;intrin)=0'],['../classtvm_1_1tir_1_1ScheduleNode.html#aaca1621ab9c3db0ddd04ac57de79d37f',1,'tvm::tir::ScheduleNode::Tensorize(const BlockRV &amp;block_rv, const String &amp;intrin)=0'],['../classtvm_1_1te_1_1Stage.html#ab5fe485e1d730c36b096c060b8d2ef9d',1,'tvm::te::Stage::tensorize()']]],
   ['tensornode',['TensorNode',['../classtvm_1_1te_1_1TensorNode.html',1,'tvm::te::TensorNode'],['../classtvm_1_1te_1_1TensorNode.html#a153569448cb1bf9d2924d35639c3b8b8',1,'tvm::te::TensorNode::TensorNode()']]],
   ['tensors',['tensors',['../classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html#afc71b9ecc0d6b82a5c2ab3250f01514b',1,'tvm::auto_scheduler::ComputeDAGNode::tensors()'],['../classtvm_1_1te_1_1TensorIntrinCallNode.html#a92b543750ea55b9cfd6852139e2ddbd6',1,'tvm::te::TensorIntrinCallNode::tensors()']]],
@@ -501,6 +503,7 @@ var searchData=
   ['typedpackedfunc_3c_20runnerresult_28_29_3e',['TypedPackedFunc&lt; RunnerResult()&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20string_28_29_3e',['TypedPackedFunc&lt; String()&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20string_28const_20array_3c_20objectref_20_3e_20_26inputs_2c_20const_20array_3c_20objectref_20_3e_20_26attrs_2c_20const_20optional_3c_20objectref_20_3e_20_26decision_2c_20const_20array_3c_20string_20_3e_20_26outputs_29_3e',['TypedPackedFunc&lt; String(const Array&lt; ObjectRef &gt; &amp;inputs, const Array&lt; ObjectRef &gt; &amp;attrs, const Optional&lt; ObjectRef &gt; &amp;decision, const Array&lt; String &gt; &amp;outputs)&gt;',['../classtvm_1_1runtime_1_1TypedP [...]
+  ['typedpackedfunc_3c_20targetjson_28targetjson_29_3e',['TypedPackedFunc&lt; TargetJSON(TargetJSON)&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20te_3a_3aschedule_28const_20attrs_20_26attrs_2c_20const_20array_3c_20te_3a_3atensor_20_3e_20_26outs_2c_20const_20target_20_26target_29_3e',['TypedPackedFunc&lt; te::Schedule(const Attrs &amp;attrs, const Array&lt; te::Tensor &gt; &amp;outs, const Target &amp;target)&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20void_28_29_3e',['TypedPackedFunc&lt; void()&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20void_28const_20array_3c_20measurecandidate_20_3e_20_26_2c_20const_20array_3c_20runnerresult_20_3e_20_26_29_3e',['TypedPackedFunc&lt; void(const Array&lt; MeasureCandidate &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
diff --git a/docs/reference/api/doxygen/search/all_16.js b/docs/reference/api/doxygen/search/all_16.js
index c2b566046..9ec69b1a4 100644
--- a/docs/reference/api/doxygen/search/all_16.js
+++ b/docs/reference/api/doxygen/search/all_16.js
@@ -15,7 +15,7 @@ var searchData=
   ['unionregion',['UnionRegion',['../namespacetvm_1_1arith.html#ad27c4f216e41eb8e81296fb7ec4b9453',1,'tvm::arith']]],
   ['unionregionlowerbound',['UnionRegionLowerBound',['../namespacetvm_1_1arith.html#a4c3dedfa4cba4ad39c953eb51eb83e4d',1,'tvm::arith']]],
   ['unipolar',['unipolar',['../structtvm_1_1relay_1_1BinaryConv2DAttrs.html#a7e0ad68dce226079b769a678aa01dc49',1,'tvm::relay::BinaryConv2DAttrs::unipolar()'],['../structtvm_1_1relay_1_1BinaryDenseAttrs.html#af21cdb9dac67ab9ecea5a19642658d8a',1,'tvm::relay::BinaryDenseAttrs::unipolar()']]],
-  ['unique',['unique',['../classtvm_1_1runtime_1_1Object.html#afd548730a6139d19fe24473ad66026d7',1,'tvm::runtime::Object::unique()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#af95c6c6fcd89da0f62b93f1167b72314',1,'tvm::runtime::ObjectPtr::unique()'],['../classtvm_1_1runtime_1_1ObjectRef.html#a4e7cdb1574b93a59e784d70aa47b8da7',1,'tvm::runtime::ObjectRef::unique()'],['../classtvm_1_1VirtualDeviceCache.html#a25ba1351484aa58a2cc7cef8f8e4423c',1,'tvm::VirtualDeviceCache::Unique()']]],
+  ['unique',['Unique',['../classtvm_1_1VirtualDeviceCache.html#a25ba1351484aa58a2cc7cef8f8e4423c',1,'tvm::VirtualDeviceCache::Unique()'],['../classtvm_1_1runtime_1_1Object.html#afd548730a6139d19fe24473ad66026d7',1,'tvm::runtime::Object::unique()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#af95c6c6fcd89da0f62b93f1167b72314',1,'tvm::runtime::ObjectPtr::unique()'],['../classtvm_1_1runtime_1_1ObjectRef.html#a4e7cdb1574b93a59e784d70aa47b8da7',1,'tvm::runtime::ObjectRef::unique()']]],
   ['uniqueattrs',['UniqueAttrs',['../structtvm_1_1relay_1_1UniqueAttrs.html',1,'tvm::relay']]],
   ['unit_5fbits',['unit_bits',['../classtvm_1_1MemoryInfoNode.html#aa935f1ee9d8d2f06633ca4b3c44f7725',1,'tvm::MemoryInfoNode']]],
   ['units',['units',['../structtvm_1_1relay_1_1BinaryDenseAttrs.html#a5373b2f2aac19653ae21aec74c69cdb0',1,'tvm::relay::BinaryDenseAttrs::units()'],['../structtvm_1_1relay_1_1MatmulAttrs.html#a5893df9ad99c6717c4e6cb440d60c6a1',1,'tvm::relay::MatmulAttrs::units()'],['../structtvm_1_1relay_1_1DenseAttrs.html#a497487f7ccced8c7492a5ed03f78fa8f',1,'tvm::relay::DenseAttrs::units()'],['../structtvm_1_1relay_1_1DensePackAttrs.html#aa0096c26c832166de13881a032ba3fbf',1,'tvm::relay::DensePackAttrs:: [...]
diff --git a/docs/reference/api/doxygen/search/all_17.js b/docs/reference/api/doxygen/search/all_17.js
index 1bbdd4bcf..f4428e833 100644
--- a/docs/reference/api/doxygen/search/all_17.js
+++ b/docs/reference/api/doxygen/search/all_17.js
@@ -18,7 +18,7 @@ var searchData=
   ['values',['values',['../structTVMArgs.html#ab2d2a8c794bc11fdb56b294e711ff63c',1,'TVMArgs::values()'],['../classtvm_1_1runtime_1_1TVMArgs.html#a3b99059e2f1ad08c99b42b5bee82752f',1,'tvm::runtime::TVMArgs::values()']]],
   ['values_5fcount',['values_count',['../structTVMArgs.html#afa042427dc87d770d0def130f497f3f5',1,'TVMArgs']]],
   ['valuetypeinfomaker',['ValueTypeInfoMaker',['../structtvm_1_1detail_1_1ValueTypeInfoMaker.html',1,'tvm::detail']]],
-  ['var',['Var',['../classtvm_1_1relay_1_1Var.html',1,'tvm::relay::Var'],['../classtvm_1_1tir_1_1Var.html',1,'tvm::tir::Var'],['../classtvm_1_1relay_1_1Var.html#a06ef8ae1d07a5b8a3c25ca7775d17762',1,'tvm::relay::Var::Var(String name_hint, Type type_annotation, Span span=Span())'],['../classtvm_1_1relay_1_1Var.html#a45372a62057ee9332a391e29845505ff',1,'tvm::relay::Var::Var(Id vid, Type type_annotation, Span span=Span())'],['../classtvm_1_1tir_1_1Var.html#a21ba7568a83bfc2a5896f9e0ff181129', [...]
+  ['var',['Var',['../classtvm_1_1tir_1_1Var.html',1,'tvm::tir::Var'],['../classtvm_1_1relay_1_1Var.html',1,'tvm::relay::Var'],['../classtvm_1_1relay_1_1PatternVarNode.html#acfa1269806fbf19e7badd424c19c64bf',1,'tvm::relay::PatternVarNode::var()'],['../classtvm_1_1relay_1_1LetPatternNode.html#aff0c6dec182a3173fe0cb601a5b74ed1',1,'tvm::relay::LetPatternNode::var()'],['../classtvm_1_1relay_1_1LetNode.html#a3a841faeeb71a7e59f99feffb182a7c1',1,'tvm::relay::LetNode::var()'],['../classtvm_1_1tir [...]
   ['var_2eh',['var.h',['../var_8h.html',1,'']]],
   ['variables',['variables',['../classtvm_1_1arith_1_1IntConstraintsNode.html#adecd62b78ba2a3fc57778088ff641cf6',1,'tvm::arith::IntConstraintsNode']]],
   ['varianceattrs',['VarianceAttrs',['../structtvm_1_1relay_1_1VarianceAttrs.html',1,'tvm::relay']]],
@@ -30,7 +30,7 @@ var searchData=
   ['vector_5funit_5fbytes',['vector_unit_bytes',['../classtvm_1_1auto__scheduler_1_1HardwareParamsNode.html#a6f2dd9161fdb3233417a9912c8854434',1,'tvm::auto_scheduler::HardwareParamsNode']]],
   ['vectorcombine',['vectorcombine',['../namespacetvm_1_1tir_1_1builtin.html#a30dff65bc2c142b57fae7f60e378ff43',1,'tvm::tir::builtin']]],
   ['vectorhigh',['vectorhigh',['../namespacetvm_1_1tir_1_1builtin.html#a45bf65ca7ca01d2016e0b609117d7e25',1,'tvm::tir::builtin']]],
-  ['vectorize',['Vectorize',['../classtvm_1_1tir_1_1ScheduleNode.html#ab4a8cd91959ceab22855ec338978bcee',1,'tvm::tir::ScheduleNode::Vectorize()'],['../classtvm_1_1auto__scheduler_1_1State.html#a97b8a21210d63bea241dbab085d89b53',1,'tvm::auto_scheduler::State::vectorize()'],['../classtvm_1_1te_1_1Stage.html#a44d33e3920106e75dc7c68272f880812',1,'tvm::te::Stage::vectorize()']]],
+  ['vectorize',['vectorize',['../classtvm_1_1auto__scheduler_1_1State.html#a97b8a21210d63bea241dbab085d89b53',1,'tvm::auto_scheduler::State::vectorize()'],['../classtvm_1_1te_1_1Stage.html#a44d33e3920106e75dc7c68272f880812',1,'tvm::te::Stage::vectorize()'],['../classtvm_1_1tir_1_1ScheduleNode.html#ab4a8cd91959ceab22855ec338978bcee',1,'tvm::tir::ScheduleNode::Vectorize()']]],
   ['vectorizeloop',['VectorizeLoop',['../namespacetvm_1_1tir_1_1transform.html#af3cecb50a8b8fc8021f6a87bc27587da',1,'tvm::tir::transform']]],
   ['vectorizer',['Vectorizer',['../classtvm_1_1tir_1_1BufferLoadNode.html#a842a72b9d02a9f8541b512478932fece',1,'tvm::tir::BufferLoadNode']]],
   ['vectorjacobianproduct',['VectorJacobianProduct',['../namespacetvm_1_1te.html#a547183f5a311af53ab598faba423fd64',1,'tvm::te']]],
diff --git a/docs/reference/api/doxygen/search/all_7.js b/docs/reference/api/doxygen/search/all_7.js
index b6b950f6f..e3a7df2cb 100644
--- a/docs/reference/api/doxygen/search/all_7.js
+++ b/docs/reference/api/doxygen/search/all_7.js
@@ -237,6 +237,7 @@ var searchData=
   ['ftvmrelaytotir',['FTVMRelayToTIR',['../namespacetvm.html#a3b1103f53a837ff14dc583e1c0b6b898',1,'tvm']]],
   ['ftvmschedule',['FTVMSchedule',['../namespacetvm_1_1relay.html#aff3d3515294c3128bb63a75c5d30f596',1,'tvm::relay']]],
   ['ftvmstrategy',['FTVMStrategy',['../namespacetvm_1_1relay.html#a2eb2ad4e7a83e0a28e2ad073d7bf9305',1,'tvm::relay']]],
+  ['ftvmtargetparser',['FTVMTargetParser',['../namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3',1,'tvm']]],
   ['ftvmtirtoruntime',['FTVMTIRToRuntime',['../namespacetvm.html#ade89c3d682d83c2e14ec9337084541e3',1,'tvm']]],
   ['ftype',['FType',['../classtvm_1_1ReprPrinter.html#aee843a9a8b4885f5797843bed6467415',1,'tvm::ReprPrinter']]],
   ['full',['full',['../namespacetvm_1_1topi.html#a5c22ce14da6f5589de129861bb06da78',1,'tvm::topi']]],
diff --git a/docs/reference/api/doxygen/search/all_9.js b/docs/reference/api/doxygen/search/all_9.js
index fb2a26248..99f4fa0e8 100644
--- a/docs/reference/api/doxygen/search/all_9.js
+++ b/docs/reference/api/doxygen/search/all_9.js
@@ -2,7 +2,7 @@ var searchData=
 [
   ['hand_5fthreaded',['hand_threaded',['../namespacetvm_1_1tir_1_1attr.html#a0d026645d3f86d9cc2e693fa232fddec',1,'tvm::tir::attr']]],
   ['handle',['Handle',['../classtvm_1_1runtime_1_1DataType.html#aebad9f7235dd20af649fb5c2113797b8',1,'tvm::runtime::DataType']]],
-  ['handler',['Handler',['../classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html',1,'tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;'],['../classtvm_1_1SEqualReducer_1_1Handler.html',1,'tvm::SEqualReducer::Handler'],['../classtvm_1_1SHashReducer_1_1Handler.html',1,'tvm::SHashReducer::Handler']]],
+  ['handler',['Handler',['../classtvm_1_1SEqualReducer_1_1Handler.html',1,'tvm::SEqualReducer::Handler'],['../classtvm_1_1SHashReducer_1_1Handler.html',1,'tvm::SHashReducer::Handler'],['../classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html',1,'tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;']]],
   ['handler_3c_20dldatatype_20_3e',['Handler&lt; DLDataType &gt;',['../structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html',1,'dmlc::serializer']]],
   ['handler_3c_20dldevice_20_3e',['Handler&lt; DLDevice &gt;',['../structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html',1,'dmlc::serializer']]],
   ['hardware_5fparams',['hardware_params',['../classtvm_1_1auto__scheduler_1_1SearchTaskNode.html#a8f71d815c5608317a1cdba5b4303df12',1,'tvm::auto_scheduler::SearchTaskNode']]],
diff --git a/docs/reference/api/doxygen/search/all_d.js b/docs/reference/api/doxygen/search/all_d.js
index 407f4cab6..b4277bb8b 100644
--- a/docs/reference/api/doxygen/search/all_d.js
+++ b/docs/reference/api/doxygen/search/all_d.js
@@ -29,7 +29,7 @@ var searchData=
   ['left_5fshift',['left_shift',['../namespacetvm.html#ad4fceb4266c6e7644fa373eacf73359f',1,'tvm::left_shift(PrimExpr a, PrimExpr b, Span span=Span())'],['../namespacetvm.html#a6bc108896d74f5f3b5cc3b98e9780e1c',1,'tvm::left_shift(const PrimExpr &amp;a, int b, Span span=Span())'],['../namespacetvm.html#a58fbf68a58a7f32935d6c4539d292a08',1,'tvm::left_shift(int a, const PrimExpr &amp;b, Span span=Span())'],['../namespacetvm_1_1topi.html#a46057526edd4bbd0a291edf7f0c863b4',1,'tvm::topi::left_ [...]
   ['legalize',['Legalize',['../namespacetvm_1_1relay_1_1qnn_1_1transform.html#a2323e3c38cc9ae1626cd98295b83e906',1,'tvm::relay::qnn::transform::Legalize()'],['../namespacetvm_1_1relay_1_1transform.html#aae623a28eda64b60c6ee90edde103891',1,'tvm::relay::transform::Legalize()']]],
   ['legalizepackedcalls',['LegalizePackedCalls',['../namespacetvm_1_1tir_1_1transform.html#acc84aad38c2cacfae333c8970ece933d',1,'tvm::tir::transform']]],
-  ['length',['Length',['../classtvm_1_1ObjectPathNode.html#a7fb8c3f4a9510e3893cb04bd045f93db',1,'tvm::ObjectPathNode::Length()'],['../classtvm_1_1runtime_1_1String.html#a812045bfefb0f80f850d1b39f0e40489',1,'tvm::runtime::String::length()']]],
+  ['length',['length',['../classtvm_1_1runtime_1_1String.html#a812045bfefb0f80f850d1b39f0e40489',1,'tvm::runtime::String::length()'],['../classtvm_1_1ObjectPathNode.html#a7fb8c3f4a9510e3893cb04bd045f93db',1,'tvm::ObjectPathNode::Length()']]],
   ['lengths',['lengths',['../classtvm_1_1auto__scheduler_1_1SplitStepNode.html#aac7336b4924fc50da9178c271f1cb2ed',1,'tvm::auto_scheduler::SplitStepNode']]],
   ['lenode',['LENode',['../structtvm_1_1tir_1_1LENode.html',1,'tvm::tir']]],
   ['less',['less',['../namespacetvm.html#a52fa1dc57423a077eb098960162e7b85',1,'tvm::less(PrimExpr a, PrimExpr b, Span span=Span())'],['../namespacetvm.html#ab69bb15adb82d166504b9cbeef21a4dc',1,'tvm::less(const PrimExpr &amp;a, float b, Span span=Span())'],['../namespacetvm.html#a0d5dc442dde0e69657c40803d394ea73',1,'tvm::less(float a, const PrimExpr &amp;b, Span span=Span())'],['../namespacetvm.html#a042ab9c595a8a0f63c07dbbdf75ecf9c',1,'tvm::less(int a, const PrimExpr &amp;b, Span span=Sp [...]
diff --git a/docs/reference/api/doxygen/search/all_e.js b/docs/reference/api/doxygen/search/all_e.js
index e23e5c309..e66ccde8d 100644
--- a/docs/reference/api/doxygen/search/all_e.js
+++ b/docs/reference/api/doxygen/search/all_e.js
@@ -90,7 +90,7 @@ var searchData=
   ['measure_5fcandidate_2eh',['measure_candidate.h',['../measure__candidate_8h.html',1,'']]],
   ['measure_5fcandidates',['measure_candidates',['../classtvm_1_1meta__schedule_1_1TuneContextNode.html#a85697ab529d4e1aab8b76f051544c638',1,'tvm::meta_schedule::TuneContextNode']]],
   ['measure_5frecord_2eh',['measure_record.h',['../measure__record_8h.html',1,'']]],
-  ['measurecallback',['MeasureCallback',['../classtvm_1_1meta__schedule_1_1MeasureCallback.html',1,'tvm::meta_schedule::MeasureCallback'],['../classtvm_1_1auto__scheduler_1_1MeasureCallback.html',1,'tvm::auto_scheduler::MeasureCallback']]],
+  ['measurecallback',['MeasureCallback',['../classtvm_1_1auto__scheduler_1_1MeasureCallback.html',1,'tvm::auto_scheduler::MeasureCallback'],['../classtvm_1_1meta__schedule_1_1MeasureCallback.html',1,'tvm::meta_schedule::MeasureCallback']]],
   ['measurecallbacknode',['MeasureCallbackNode',['../classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html',1,'tvm::meta_schedule::MeasureCallbackNode'],['../classtvm_1_1auto__scheduler_1_1MeasureCallbackNode.html',1,'tvm::auto_scheduler::MeasureCallbackNode']]],
   ['measurecandidate',['MeasureCandidate',['../classtvm_1_1meta__schedule_1_1MeasureCandidate.html',1,'tvm::meta_schedule::MeasureCandidate'],['../classtvm_1_1meta__schedule_1_1MeasureCandidate.html#a76d9f7a93cc12fee81d48024bb0b8a39',1,'tvm::meta_schedule::MeasureCandidate::MeasureCandidate()']]],
   ['measurecandidatenode',['MeasureCandidateNode',['../classtvm_1_1meta__schedule_1_1MeasureCandidateNode.html',1,'tvm::meta_schedule']]],
diff --git a/docs/reference/api/doxygen/search/classes_10.js b/docs/reference/api/doxygen/search/classes_10.js
index 5565cbd79..ace450912 100644
--- a/docs/reference/api/doxygen/search/classes_10.js
+++ b/docs/reference/api/doxygen/search/classes_10.js
@@ -7,7 +7,7 @@ var searchData=
   ['scatterattrs',['ScatterAttrs',['../structtvm_1_1relay_1_1ScatterAttrs.html',1,'tvm::relay']]],
   ['scatterndattrs',['ScatterNDAttrs',['../structtvm_1_1relay_1_1ScatterNDAttrs.html',1,'tvm::relay']]],
   ['schedule',['Schedule',['../classtvm_1_1te_1_1Schedule.html',1,'tvm::te::Schedule'],['../classtvm_1_1tir_1_1Schedule.html',1,'tvm::tir::Schedule']]],
-  ['schedulenode',['ScheduleNode',['../classtvm_1_1te_1_1ScheduleNode.html',1,'tvm::te::ScheduleNode'],['../classtvm_1_1tir_1_1ScheduleNode.html',1,'tvm::tir::ScheduleNode']]],
+  ['schedulenode',['ScheduleNode',['../classtvm_1_1tir_1_1ScheduleNode.html',1,'tvm::tir::ScheduleNode'],['../classtvm_1_1te_1_1ScheduleNode.html',1,'tvm::te::ScheduleNode']]],
   ['schedulerule',['ScheduleRule',['../classtvm_1_1meta__schedule_1_1ScheduleRule.html',1,'tvm::meta_schedule']]],
   ['schedulerulenode',['ScheduleRuleNode',['../classtvm_1_1meta__schedule_1_1ScheduleRuleNode.html',1,'tvm::meta_schedule']]],
   ['schedulestate',['ScheduleState',['../classtvm_1_1tir_1_1ScheduleState.html',1,'tvm::tir']]],
@@ -84,9 +84,9 @@ var searchData=
   ['splitstepnode',['SplitStepNode',['../classtvm_1_1auto__scheduler_1_1SplitStepNode.html',1,'tvm::auto_scheduler']]],
   ['squeezeattrs',['SqueezeAttrs',['../structtvm_1_1relay_1_1SqueezeAttrs.html',1,'tvm::relay']]],
   ['stackattrs',['StackAttrs',['../structtvm_1_1relay_1_1StackAttrs.html',1,'tvm::relay']]],
-  ['stage',['Stage',['../classtvm_1_1auto__scheduler_1_1Stage.html',1,'tvm::auto_scheduler::Stage'],['../classtvm_1_1te_1_1Stage.html',1,'tvm::te::Stage']]],
+  ['stage',['Stage',['../classtvm_1_1te_1_1Stage.html',1,'tvm::te::Stage'],['../classtvm_1_1auto__scheduler_1_1Stage.html',1,'tvm::auto_scheduler::Stage']]],
   ['stageattributes',['StageAttributes',['../structtvm_1_1auto__scheduler_1_1StageAttributes.html',1,'tvm::auto_scheduler']]],
-  ['stagenode',['StageNode',['../classtvm_1_1auto__scheduler_1_1StageNode.html',1,'tvm::auto_scheduler::StageNode'],['../classtvm_1_1te_1_1StageNode.html',1,'tvm::te::StageNode']]],
+  ['stagenode',['StageNode',['../classtvm_1_1te_1_1StageNode.html',1,'tvm::te::StageNode'],['../classtvm_1_1auto__scheduler_1_1StageNode.html',1,'tvm::auto_scheduler::StageNode']]],
   ['state',['State',['../classtvm_1_1auto__scheduler_1_1State.html',1,'tvm::auto_scheduler']]],
   ['statenode',['StateNode',['../classtvm_1_1auto__scheduler_1_1StateNode.html',1,'tvm::auto_scheduler']]],
   ['step',['Step',['../classtvm_1_1auto__scheduler_1_1Step.html',1,'tvm::auto_scheduler']]],
diff --git a/docs/reference/api/doxygen/search/classes_11.js b/docs/reference/api/doxygen/search/classes_11.js
index 0f0417965..99fc705ce 100644
--- a/docs/reference/api/doxygen/search/classes_11.js
+++ b/docs/reference/api/doxygen/search/classes_11.js
@@ -21,11 +21,11 @@ var searchData=
   ['tensorcomputeopnode',['TensorComputeOpNode',['../classtvm_1_1te_1_1TensorComputeOpNode.html',1,'tvm::te']]],
   ['tensordom',['TensorDom',['../structtvm_1_1te_1_1TensorDom.html',1,'tvm::te']]],
   ['tensorinfo',['TensorInfo',['../classtvm_1_1meta__schedule_1_1TensorInfo.html',1,'tvm::meta_schedule::TensorInfo'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfo.html',1,'tvm::runtime::metadata::TensorInfo']]],
-  ['tensorinfonode',['TensorInfoNode',['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html',1,'tvm::meta_schedule::TensorInfoNode'],['../classtvm_1_1runtime_1_1metadata_1_1TensorInfoNode.html',1,'tvm::runtime::metadata::TensorInfoNode']]],
+  ['tensorinfonode',['TensorInfoNode',['../classtvm_1_1runtime_1_1metadata_1_1TensorInfoNode.html',1,'tvm::runtime::metadata::TensorInfoNode'],['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html',1,'tvm::meta_schedule::TensorInfoNode']]],
   ['tensorintrin',['TensorIntrin',['../classtvm_1_1tir_1_1TensorIntrin.html',1,'tvm::tir::TensorIntrin'],['../classtvm_1_1te_1_1TensorIntrin.html',1,'tvm::te::TensorIntrin']]],
   ['tensorintrincall',['TensorIntrinCall',['../classtvm_1_1te_1_1TensorIntrinCall.html',1,'tvm::te']]],
   ['tensorintrincallnode',['TensorIntrinCallNode',['../classtvm_1_1te_1_1TensorIntrinCallNode.html',1,'tvm::te']]],
-  ['tensorintrinnode',['TensorIntrinNode',['../classtvm_1_1tir_1_1TensorIntrinNode.html',1,'tvm::tir::TensorIntrinNode'],['../classtvm_1_1te_1_1TensorIntrinNode.html',1,'tvm::te::TensorIntrinNode']]],
+  ['tensorintrinnode',['TensorIntrinNode',['../classtvm_1_1te_1_1TensorIntrinNode.html',1,'tvm::te::TensorIntrinNode'],['../classtvm_1_1tir_1_1TensorIntrinNode.html',1,'tvm::tir::TensorIntrinNode']]],
   ['tensornode',['TensorNode',['../classtvm_1_1te_1_1TensorNode.html',1,'tvm::te']]],
   ['tensortype',['TensorType',['../classtvm_1_1TensorType.html',1,'tvm']]],
   ['tensortypenode',['TensorTypeNode',['../classtvm_1_1TensorTypeNode.html',1,'tvm']]],
@@ -112,6 +112,7 @@ var searchData=
   ['typedpackedfunc_3c_20runnerresult_28_29_3e',['TypedPackedFunc&lt; RunnerResult()&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20string_28_29_3e',['TypedPackedFunc&lt; String()&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20string_28const_20array_3c_20objectref_20_3e_20_26inputs_2c_20const_20array_3c_20objectref_20_3e_20_26attrs_2c_20const_20optional_3c_20objectref_20_3e_20_26decision_2c_20const_20array_3c_20string_20_3e_20_26outputs_29_3e',['TypedPackedFunc&lt; String(const Array&lt; ObjectRef &gt; &amp;inputs, const Array&lt; ObjectRef &gt; &amp;attrs, const Optional&lt; ObjectRef &gt; &amp;decision, const Array&lt; String &gt; &amp;outputs)&gt;',['../classtvm_1_1runtime_1_1TypedP [...]
+  ['typedpackedfunc_3c_20targetjson_28targetjson_29_3e',['TypedPackedFunc&lt; TargetJSON(TargetJSON)&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20te_3a_3aschedule_28const_20attrs_20_26attrs_2c_20const_20array_3c_20te_3a_3atensor_20_3e_20_26outs_2c_20const_20target_20_26target_29_3e',['TypedPackedFunc&lt; te::Schedule(const Attrs &amp;attrs, const Array&lt; te::Tensor &gt; &amp;outs, const Target &amp;target)&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20void_28_29_3e',['TypedPackedFunc&lt; void()&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
   ['typedpackedfunc_3c_20void_28const_20array_3c_20measurecandidate_20_3e_20_26_2c_20const_20array_3c_20runnerresult_20_3e_20_26_29_3e',['TypedPackedFunc&lt; void(const Array&lt; MeasureCandidate &gt; &amp;, const Array&lt; RunnerResult &gt; &amp;)&gt;',['../classtvm_1_1runtime_1_1TypedPackedFunc.html',1,'tvm::runtime']]],
diff --git a/docs/reference/api/doxygen/search/classes_13.js b/docs/reference/api/doxygen/search/classes_13.js
index 71edf12b8..72ce4730e 100644
--- a/docs/reference/api/doxygen/search/classes_13.js
+++ b/docs/reference/api/doxygen/search/classes_13.js
@@ -3,7 +3,7 @@ var searchData=
   ['v_5finfo',['v_info',['../structtvm_1_1relay_1_1v__info.html',1,'tvm::relay']]],
   ['valueconverter',['ValueConverter',['../structtvm_1_1runtime_1_1Array_1_1ValueConverter.html',1,'tvm::runtime::Array']]],
   ['valuetypeinfomaker',['ValueTypeInfoMaker',['../structtvm_1_1detail_1_1ValueTypeInfoMaker.html',1,'tvm::detail']]],
-  ['var',['Var',['../classtvm_1_1relay_1_1Var.html',1,'tvm::relay::Var'],['../classtvm_1_1tir_1_1Var.html',1,'tvm::tir::Var']]],
+  ['var',['Var',['../classtvm_1_1tir_1_1Var.html',1,'tvm::tir::Var'],['../classtvm_1_1relay_1_1Var.html',1,'tvm::relay::Var']]],
   ['varianceattrs',['VarianceAttrs',['../structtvm_1_1relay_1_1VarianceAttrs.html',1,'tvm::relay']]],
   ['varnode',['VarNode',['../classtvm_1_1tir_1_1VarNode.html',1,'tvm::tir::VarNode'],['../classtvm_1_1relay_1_1VarNode.html',1,'tvm::relay::VarNode']]],
   ['varpattern',['VarPattern',['../classtvm_1_1relay_1_1VarPattern.html',1,'tvm::relay']]],
diff --git a/docs/reference/api/doxygen/search/classes_7.js b/docs/reference/api/doxygen/search/classes_7.js
index ee917209f..2e5d00a7e 100644
--- a/docs/reference/api/doxygen/search/classes_7.js
+++ b/docs/reference/api/doxygen/search/classes_7.js
@@ -1,6 +1,6 @@
 var searchData=
 [
-  ['handler',['Handler',['../classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html',1,'tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;'],['../classtvm_1_1SEqualReducer_1_1Handler.html',1,'tvm::SEqualReducer::Handler'],['../classtvm_1_1SHashReducer_1_1Handler.html',1,'tvm::SHashReducer::Handler']]],
+  ['handler',['Handler',['../classtvm_1_1SEqualReducer_1_1Handler.html',1,'tvm::SEqualReducer::Handler'],['../classtvm_1_1SHashReducer_1_1Handler.html',1,'tvm::SHashReducer::Handler'],['../classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html',1,'tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;']]],
   ['handler_3c_20dldatatype_20_3e',['Handler&lt; DLDataType &gt;',['../structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html',1,'dmlc::serializer']]],
   ['handler_3c_20dldevice_20_3e',['Handler&lt; DLDevice &gt;',['../structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html',1,'dmlc::serializer']]],
   ['hardwareparams',['HardwareParams',['../classtvm_1_1auto__scheduler_1_1HardwareParams.html',1,'tvm::auto_scheduler']]],
diff --git a/docs/reference/api/doxygen/search/classes_a.js b/docs/reference/api/doxygen/search/classes_a.js
index 3b8a40ee7..4236c3730 100644
--- a/docs/reference/api/doxygen/search/classes_a.js
+++ b/docs/reference/api/doxygen/search/classes_a.js
@@ -32,7 +32,7 @@ var searchData=
   ['maxpool1dattrs',['MaxPool1DAttrs',['../structtvm_1_1relay_1_1MaxPool1DAttrs.html',1,'tvm::relay']]],
   ['maxpool2dattrs',['MaxPool2DAttrs',['../structtvm_1_1relay_1_1MaxPool2DAttrs.html',1,'tvm::relay']]],
   ['maxpool3dattrs',['MaxPool3DAttrs',['../structtvm_1_1relay_1_1MaxPool3DAttrs.html',1,'tvm::relay']]],
-  ['measurecallback',['MeasureCallback',['../classtvm_1_1meta__schedule_1_1MeasureCallback.html',1,'tvm::meta_schedule::MeasureCallback'],['../classtvm_1_1auto__scheduler_1_1MeasureCallback.html',1,'tvm::auto_scheduler::MeasureCallback']]],
+  ['measurecallback',['MeasureCallback',['../classtvm_1_1auto__scheduler_1_1MeasureCallback.html',1,'tvm::auto_scheduler::MeasureCallback'],['../classtvm_1_1meta__schedule_1_1MeasureCallback.html',1,'tvm::meta_schedule::MeasureCallback']]],
   ['measurecallbacknode',['MeasureCallbackNode',['../classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html',1,'tvm::meta_schedule::MeasureCallbackNode'],['../classtvm_1_1auto__scheduler_1_1MeasureCallbackNode.html',1,'tvm::auto_scheduler::MeasureCallbackNode']]],
   ['measurecandidate',['MeasureCandidate',['../classtvm_1_1meta__schedule_1_1MeasureCandidate.html',1,'tvm::meta_schedule']]],
   ['measurecandidatenode',['MeasureCandidateNode',['../classtvm_1_1meta__schedule_1_1MeasureCandidateNode.html',1,'tvm::meta_schedule']]],
diff --git a/docs/reference/api/doxygen/search/functions_10.js b/docs/reference/api/doxygen/search/functions_10.js
index 5325cf526..f6c6481f0 100644
--- a/docs/reference/api/doxygen/search/functions_10.js
+++ b/docs/reference/api/doxygen/search/functions_10.js
@@ -7,9 +7,9 @@ var searchData=
   ['packetdone',['PacketDone',['../classtvm_1_1runtime_1_1micro__rpc_1_1WriteStream.html#a1745b7d9d5a0e094e129eb7a4c363ac9',1,'tvm::runtime::micro_rpc::WriteStream']]],
   ['packimportstoc',['PackImportsToC',['../namespacetvm_1_1codegen.html#abf02059ebadcdb8bbbe5c840b646d67b',1,'tvm::codegen']]],
   ['packimportstollvm',['PackImportsToLLVM',['../namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6',1,'tvm::codegen']]],
-  ['pad',['pad',['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElementWise, std::string pad_mode=&quot;constant&quot;, const Array&lt; PrimExpr &gt; *dyn_output_shape=nullptr)'],['../namespacetvm_1_1topi [...]
+  ['pad',['Pad',['../namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121',1,'tvm::topi::Pad(const Array&lt; PrimExpr &gt; shape, int odim)'],['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElement [...]
   ['pagememorymanagercreate',['PageMemoryManagerCreate',['../page__allocator_8h.html#a720dbc7474ac13b93fafb974cfc20bc7',1,'page_allocator.h']]],
-  ['parallel',['Parallel',['../classtvm_1_1tir_1_1ScheduleNode.html#a553dc17c0b49b175cd16881c81b6c789',1,'tvm::tir::ScheduleNode::Parallel()'],['../classtvm_1_1auto__scheduler_1_1State.html#a2376f0180bc5b5dd4b456f2a75d4a366',1,'tvm::auto_scheduler::State::parallel()'],['../classtvm_1_1te_1_1Stage.html#a60a6be10a1a96cb594c1399efabafef3',1,'tvm::te::Stage::parallel()']]],
+  ['parallel',['parallel',['../classtvm_1_1auto__scheduler_1_1State.html#a2376f0180bc5b5dd4b456f2a75d4a366',1,'tvm::auto_scheduler::State::parallel()'],['../classtvm_1_1te_1_1Stage.html#a60a6be10a1a96cb594c1399efabafef3',1,'tvm::te::Stage::parallel()'],['../classtvm_1_1tir_1_1ScheduleNode.html#a553dc17c0b49b175cd16881c81b6c789',1,'tvm::tir::ScheduleNode::Parallel()']]],
   ['parallel_5ffor',['parallel_for',['../namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c',1,'tvm::support']]],
   ['parallel_5ffor_5fdynamic',['parallel_for_dynamic',['../namespacetvm_1_1support.html#afe4271363c794f1644ce7af5c2266530',1,'tvm::support']]],
   ['parallelizevectorizeunroll',['ParallelizeVectorizeUnroll',['../classtvm_1_1meta__schedule_1_1ScheduleRule.html#a0ef9b604081db7a8bf960f3fbfd3a804',1,'tvm::meta_schedule::ScheduleRule']]],
diff --git a/docs/reference/api/doxygen/search/functions_12.js b/docs/reference/api/doxygen/search/functions_12.js
index c9dc7f277..9a515c3d4 100644
--- a/docs/reference/api/doxygen/search/functions_12.js
+++ b/docs/reference/api/doxygen/search/functions_12.js
@@ -52,7 +52,7 @@ var searchData=
   ['rendererrors',['RenderErrors',['../classtvm_1_1ErrorReporter.html#a54699ec5f538bd207b5aa4e3f55181c6',1,'tvm::ErrorReporter']]],
   ['renewdefs',['RenewDefs',['../namespacetvm_1_1tir.html#a2e639c81d1c6875ead7764ab8a7cd553',1,'tvm::tir']]],
   ['renormalizesplitpattern',['RenormalizeSplitPattern',['../namespacetvm_1_1tir_1_1transform.html#a5c670c9efcd740f2f168b62e624c8c57',1,'tvm::tir::transform']]],
-  ['reorder',['reorder',['../classtvm_1_1auto__scheduler_1_1State.html#a16e95966b46977eff629a5f4f1564533',1,'tvm::auto_scheduler::State::reorder()'],['../classtvm_1_1te_1_1Stage.html#ad96cd240a92df9cafae89cdf2a7e302e',1,'tvm::te::Stage::reorder()'],['../classtvm_1_1tir_1_1ScheduleNode.html#a059229fe0e254961da406807a97f7a3d',1,'tvm::tir::ScheduleNode::Reorder()']]],
+  ['reorder',['Reorder',['../classtvm_1_1tir_1_1ScheduleNode.html#a059229fe0e254961da406807a97f7a3d',1,'tvm::tir::ScheduleNode::Reorder()'],['../classtvm_1_1auto__scheduler_1_1State.html#a16e95966b46977eff629a5f4f1564533',1,'tvm::auto_scheduler::State::reorder()'],['../classtvm_1_1te_1_1Stage.html#ad96cd240a92df9cafae89cdf2a7e302e',1,'tvm::te::Stage::reorder()']]],
   ['reorderstep',['ReorderStep',['../classtvm_1_1auto__scheduler_1_1ReorderStep.html#a83b9dab5f38d5a4d42c6424ba437bc10',1,'tvm::auto_scheduler::ReorderStep::ReorderStep(int stage_id, const Array&lt; Integer &gt; &amp;after_ids)'],['../classtvm_1_1auto__scheduler_1_1ReorderStep.html#a9586534afef3e0f57ab31e8374e70792',1,'tvm::auto_scheduler::ReorderStep::ReorderStep(dmlc::JSONReader *reader)']]],
   ['reorg',['reorg',['../namespacetvm_1_1topi_1_1vision.html#a1014df582489005202c4218e51792314',1,'tvm::topi::vision']]],
   ['repeat',['repeat',['../namespacetvm_1_1topi.html#afe9f6d9103b2dfbc601bfd2304a4e687',1,'tvm::topi']]],
@@ -65,7 +65,7 @@ var searchData=
   ['reportat',['ReportAt',['../classtvm_1_1ErrorReporter.html#a3e1c300e60077c38bc9540dddcd1a019',1,'tvm::ErrorReporter::ReportAt(const GlobalVar &amp;global, const ObjectRef &amp;node, std::stringstream &amp;err)'],['../classtvm_1_1ErrorReporter.html#a04384ff3175673b4ff08fe46abca281c',1,'tvm::ErrorReporter::ReportAt(const GlobalVar &amp;global, const ObjectRef &amp;node, const CompileError &amp;err)']]],
   ['reprprinter',['ReprPrinter',['../classtvm_1_1ReprPrinter.html#a05b878a528f2dec33e28278b17ddeb6b',1,'tvm::ReprPrinter']]],
   ['reserve',['reserve',['../classtvm_1_1runtime_1_1Array.html#a1a7727b86efaf35c58a5198ab1c139c8',1,'tvm::runtime::Array']]],
-  ['reset',['reset',['../classtvm_1_1runtime_1_1NDArray.html#af2a8ccab95d432d1ecad7a389e11bcd3',1,'tvm::runtime::NDArray::reset()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#ac4461465ba0e785794794e0405c96590',1,'tvm::runtime::ObjectPtr::reset()'],['../classtvm_1_1auto__scheduler_1_1ProgramMeasurerNode.html#a73b14ea360a9902c291d5bf6e97636cd',1,'tvm::auto_scheduler::ProgramMeasurerNode::Reset()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1Unframer.html#ae6279154fe70e9eb85937b51e70a4bf8',1, [...]
+  ['reset',['Reset',['../classtvm_1_1auto__scheduler_1_1ProgramMeasurerNode.html#a73b14ea360a9902c291d5bf6e97636cd',1,'tvm::auto_scheduler::ProgramMeasurerNode::Reset()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1Unframer.html#ae6279154fe70e9eb85937b51e70a4bf8',1,'tvm::runtime::micro_rpc::Unframer::Reset()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1Framer.html#a44ff9650ecca8785e33c25c369d2570a',1,'tvm::runtime::micro_rpc::Framer::Reset()'],['../classtvm_1_1tir_1_1StmtSRefNode.html#a0a81 [...]
   ['reset_5fattr',['reset_attr',['../classtvm_1_1OpRegEntry.html#a67628f8d3d6dea5b0a47e462c06b7790',1,'tvm::OpRegEntry']]],
   ['resetthreadpool',['ResetThreadPool',['../namespacetvm_1_1runtime_1_1threading.html#aafdb21c00248ff146b614a7e888b4fd7',1,'tvm::runtime::threading']]],
   ['reshape',['reshape',['../namespacetvm_1_1topi.html#a3aad65f2505802109ba7d05359ce9005',1,'tvm::topi']]],
diff --git a/docs/reference/api/doxygen/search/functions_13.js b/docs/reference/api/doxygen/search/functions_13.js
index 191c41c08..c4f53dacc 100644
--- a/docs/reference/api/doxygen/search/functions_13.js
+++ b/docs/reference/api/doxygen/search/functions_13.js
@@ -73,6 +73,7 @@ var searchData=
   ['set_5fscope',['set_scope',['../classtvm_1_1te_1_1Stage.html#a0dc42f190125f0cf63e8d749ae66af7c',1,'tvm::te::Stage']]],
   ['set_5fstore_5fpredicate',['set_store_predicate',['../classtvm_1_1te_1_1Stage.html#a0056636228aed23f71eecc0810731436',1,'tvm::te::Stage']]],
   ['set_5fsupport_5flevel',['set_support_level',['../classtvm_1_1OpRegEntry.html#ab4f7e0f99c8acf2153e15f7cbb6c3c97',1,'tvm::OpRegEntry']]],
+  ['set_5ftarget_5fparser',['set_target_parser',['../classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8',1,'tvm::TargetKindRegEntry']]],
   ['set_5fupper_5fbound',['set_upper_bound',['../structtvm_1_1detail_1_1AttrNopEntry.html#add2843b725ee43be26672a8d2d641cce',1,'tvm::detail::AttrNopEntry::set_upper_bound()'],['../structtvm_1_1detail_1_1AttrInitEntry.html#a69e876dfc10eed9573c3043ea5ef2013',1,'tvm::detail::AttrInitEntry::set_upper_bound()'],['../classtvm_1_1detail_1_1AttrDocEntry.html#aec039b071d826ab164c5abe123aefaa3',1,'tvm::detail::AttrDocEntry::set_upper_bound()'],['../structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry [...]
   ['setaxisseparator',['SetAxisSeparator',['../classtvm_1_1tir_1_1ScheduleNode.html#a025b5eef0c2516fc1f72eed9ced88807',1,'tvm::tir::ScheduleNode']]],
   ['setcomputeatiter',['SetComputeAtIter',['../classtvm_1_1auto__scheduler_1_1AttachMap.html#af29900c08d7d6a108bb79eaf8325e8a1',1,'tvm::auto_scheduler::AttachMap']]],
@@ -126,7 +127,7 @@ var searchData=
   ['singlepoint',['SinglePoint',['../classtvm_1_1arith_1_1IntSet.html#a58aeb0d34656b1b43ac2532e4dfa12ed',1,'tvm::arith::IntSet']]],
   ['singleton',['Singleton',['../classtvm_1_1te_1_1Singleton.html#a94450b853dcd5e9865546d8c8fe351a1',1,'tvm::te::Singleton']]],
   ['sinh',['sinh',['../namespacetvm.html#ad828bc801c73df761c58d9f8877d52ee',1,'tvm::sinh()'],['../namespacetvm_1_1topi.html#af9694f5470ba2cabc19866be3b00fe8d',1,'tvm::topi::sinh()']]],
-  ['size',['size',['../classtvm_1_1runtime_1_1ADT.html#af51613add20f67643684b1c7fdd5569a',1,'tvm::runtime::ADT::size()'],['../classtvm_1_1runtime_1_1ArrayNode.html#a3e88cee6eb31d0e495f7debd94b7573d',1,'tvm::runtime::ArrayNode::size()'],['../classtvm_1_1runtime_1_1Array.html#aed6387e67d18b9d5ad18f510fd600a25',1,'tvm::runtime::Array::size()'],['../classtvm_1_1runtime_1_1MapNode.html#a5c0c770f7667f911aa8bec879e3ac214',1,'tvm::runtime::MapNode::size()'],['../classtvm_1_1runtime_1_1Map.html#a [...]
+  ['size',['Size',['../classtvm_1_1TensorTypeNode.html#a1f08dac86ae8aea81d058ef64cfd38b4',1,'tvm::TensorTypeNode::Size()'],['../classtvm_1_1meta__schedule_1_1DatabaseNode.html#aae5b9ab9f7e497654b90c23a2159a5cc',1,'tvm::meta_schedule::DatabaseNode::Size()'],['../classtvm_1_1meta__schedule_1_1PyDatabaseNode.html#a36817d04978253571fef7d01427ce9c0',1,'tvm::meta_schedule::PyDatabaseNode::Size()'],['../classtvm_1_1runtime_1_1micro__rpc_1_1FrameBuffer.html#ae395a0f1c6e79e825aa7a244c74a5d7b',1,' [...]
   ['sizevar',['SizeVar',['../classtvm_1_1tir_1_1SizeVar.html#ac470249315d9e395ad581d35dd5dcb05',1,'tvm::tir::SizeVar::SizeVar(ObjectPtr&lt; Object &gt; n)'],['../classtvm_1_1tir_1_1SizeVar.html#a0f8cb8a92feb96343939d223db90f7cd',1,'tvm::tir::SizeVar::SizeVar(String name_hint=&quot;s&quot;, DataType t=DataType::Int(32), Span span=Span())']]],
   ['skipassert',['SkipAssert',['../namespacetvm_1_1tir_1_1transform.html#a6fdd5910b00af823071dcdddd21cd2d3',1,'tvm::tir::transform']]],
   ['slice',['Slice',['../classtvm_1_1te_1_1Tensor_1_1Slice.html#ab314819e8bcca6421e9a4f33e48578c3',1,'tvm::te::Tensor::Slice']]],
@@ -146,7 +147,7 @@ var searchData=
   ['sparse_5fto_5fdense',['sparse_to_dense',['../namespacetvm_1_1topi.html#a877e6fdffb6b6c051c29602ec6fe995c',1,'tvm::topi']]],
   ['specialize',['Specialize',['../namespacetvm_1_1tir.html#a69b6f1b0014dc6e7dd390cff746e9782',1,'tvm::tir']]],
   ['specializedcondition',['SpecializedCondition',['../classtvm_1_1te_1_1SpecializedCondition.html#a48d119ee1c6033929a5592cfc2592e60',1,'tvm::te::SpecializedCondition']]],
-  ['split',['split',['../classtvm_1_1auto__scheduler_1_1State.html#a5815f21fc90ba7cc379c2410c05ab54c',1,'tvm::auto_scheduler::State::split()'],['../classtvm_1_1te_1_1Stage.html#a5a7cd562be59b68a187ad97085a3425d',1,'tvm::te::Stage::split()'],['../classtvm_1_1te_1_1Split.html#a328e0c093ce5b41ebaf33e0e80592764',1,'tvm::te::Split::Split()'],['../classtvm_1_1tir_1_1Layout.html#ad7657af7789fe040d3224c0149976bb4',1,'tvm::tir::Layout::Split()'],['../classtvm_1_1tir_1_1ScheduleNode.html#ac190a0ab [...]
+  ['split',['Split',['../classtvm_1_1te_1_1Split.html#a328e0c093ce5b41ebaf33e0e80592764',1,'tvm::te::Split::Split()'],['../classtvm_1_1tir_1_1Layout.html#ad7657af7789fe040d3224c0149976bb4',1,'tvm::tir::Layout::Split()'],['../classtvm_1_1tir_1_1ScheduleNode.html#ac190a0ab76d8754a35209479bcc6dfa2',1,'tvm::tir::ScheduleNode::Split()'],['../classtvm_1_1auto__scheduler_1_1State.html#a5815f21fc90ba7cc379c2410c05ab54c',1,'tvm::auto_scheduler::State::split()'],['../classtvm_1_1te_1_1Stage.html#a [...]
   ['split_5fby_5fnparts',['split_by_nparts',['../classtvm_1_1te_1_1Stage.html#a51432f38d9ec4792a2525023179ae604',1,'tvm::te::Stage']]],
   ['split_5fsections',['split_sections',['../namespacetvm_1_1topi.html#acc643e2ed166fa2ed82a95853e145619',1,'tvm::topi']]],
   ['splitargs',['SplitArgs',['../namespacetvm_1_1relay_1_1transform.html#a2425d757b896168a109498e8d34ba960',1,'tvm::relay::transform']]],
@@ -185,7 +186,7 @@ var searchData=
   ['storageflatten',['StorageFlatten',['../namespacetvm_1_1tir_1_1transform.html#a778d3e1efecdff97e7bcf0e6a5406e61',1,'tvm::tir::transform']]],
   ['storagerewrite',['StorageRewrite',['../namespacetvm_1_1tir_1_1transform.html#abe87b271e2c20e0ad901697f33c01d2c',1,'tvm::tir::transform']]],
   ['store',['Store',['../classtvm_1_1tir_1_1Store.html#a2c4278b8bcdae57ada2022ecc7c290c3',1,'tvm::tir::Store']]],
-  ['str',['str',['../classtvm_1_1TargetNode.html#a30cd67db46a9c4b098a8ba38fff22e26',1,'tvm::TargetNode::str()'],['../classtvm_1_1script_1_1printer_1_1LiteralDoc.html#a789d7d73bd4d94612fa2a84c16b26b89',1,'tvm::script::printer::LiteralDoc::Str()']]],
+  ['str',['Str',['../classtvm_1_1script_1_1printer_1_1LiteralDoc.html#a789d7d73bd4d94612fa2a84c16b26b89',1,'tvm::script::printer::LiteralDoc::Str()'],['../classtvm_1_1TargetNode.html#a30cd67db46a9c4b098a8ba38fff22e26',1,'tvm::TargetNode::str()']]],
   ['str2set',['Str2Set',['../namespacetvm_1_1topi.html#af01f6cc6b977801126083f0faffe252b',1,'tvm::topi']]],
   ['streamsync',['StreamSync',['../classtvm_1_1runtime_1_1DeviceAPI.html#ac29b9295c432a87658392872c644864f',1,'tvm::runtime::DeviceAPI']]],
   ['strided_5fslice',['strided_slice',['../namespacetvm_1_1topi.html#a208e90d4a8db8cf2c7d77b4460f7df70',1,'tvm::topi']]],
diff --git a/docs/reference/api/doxygen/search/functions_15.js b/docs/reference/api/doxygen/search/functions_15.js
index f4efdcec0..b2a53cff2 100644
--- a/docs/reference/api/doxygen/search/functions_15.js
+++ b/docs/reference/api/doxygen/search/functions_15.js
@@ -12,7 +12,7 @@ var searchData=
   ['unionlowerbound',['UnionLowerBound',['../namespacetvm_1_1arith.html#ab22d7fd95abb5fa372843a40e19d80c5',1,'tvm::arith']]],
   ['unionregion',['UnionRegion',['../namespacetvm_1_1arith.html#ad27c4f216e41eb8e81296fb7ec4b9453',1,'tvm::arith']]],
   ['unionregionlowerbound',['UnionRegionLowerBound',['../namespacetvm_1_1arith.html#a4c3dedfa4cba4ad39c953eb51eb83e4d',1,'tvm::arith']]],
-  ['unique',['unique',['../classtvm_1_1runtime_1_1Object.html#afd548730a6139d19fe24473ad66026d7',1,'tvm::runtime::Object::unique()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#af95c6c6fcd89da0f62b93f1167b72314',1,'tvm::runtime::ObjectPtr::unique()'],['../classtvm_1_1runtime_1_1ObjectRef.html#a4e7cdb1574b93a59e784d70aa47b8da7',1,'tvm::runtime::ObjectRef::unique()'],['../classtvm_1_1VirtualDeviceCache.html#a25ba1351484aa58a2cc7cef8f8e4423c',1,'tvm::VirtualDeviceCache::Unique()']]],
+  ['unique',['Unique',['../classtvm_1_1VirtualDeviceCache.html#a25ba1351484aa58a2cc7cef8f8e4423c',1,'tvm::VirtualDeviceCache::Unique()'],['../classtvm_1_1runtime_1_1Object.html#afd548730a6139d19fe24473ad66026d7',1,'tvm::runtime::Object::unique()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#af95c6c6fcd89da0f62b93f1167b72314',1,'tvm::runtime::ObjectPtr::unique()'],['../classtvm_1_1runtime_1_1ObjectRef.html#a4e7cdb1574b93a59e784d70aa47b8da7',1,'tvm::runtime::ObjectRef::unique()']]],
   ['unknownattributeaccesspathnode',['UnknownAttributeAccessPathNode',['../classtvm_1_1UnknownAttributeAccessPathNode.html#a1882e9e591466a2785acc761dc63d56e',1,'tvm::UnknownAttributeAccessPathNode']]],
   ['unmatchedcases',['UnmatchedCases',['../namespacetvm_1_1relay.html#aa3a8cace40f8056fd6412f39c3eaa605',1,'tvm::relay']]],
   ['unravel_5findex',['unravel_index',['../namespacetvm_1_1topi.html#a8811a02532bbe3047986bf1a8449ac0e',1,'tvm::topi']]],
diff --git a/docs/reference/api/doxygen/search/functions_16.js b/docs/reference/api/doxygen/search/functions_16.js
index afe90b11a..59fa5b46a 100644
--- a/docs/reference/api/doxygen/search/functions_16.js
+++ b/docs/reference/api/doxygen/search/functions_16.js
@@ -8,7 +8,7 @@ var searchData=
   ['vector',['Vector',['../classtvm_1_1arith_1_1IntSet.html#a29b6f1e60f4b328fcfabc514e0c10f17',1,'tvm::arith::IntSet']]],
   ['vectorcombine',['vectorcombine',['../namespacetvm_1_1tir_1_1builtin.html#a30dff65bc2c142b57fae7f60e378ff43',1,'tvm::tir::builtin']]],
   ['vectorhigh',['vectorhigh',['../namespacetvm_1_1tir_1_1builtin.html#a45bf65ca7ca01d2016e0b609117d7e25',1,'tvm::tir::builtin']]],
-  ['vectorize',['Vectorize',['../classtvm_1_1tir_1_1ScheduleNode.html#ab4a8cd91959ceab22855ec338978bcee',1,'tvm::tir::ScheduleNode::Vectorize()'],['../classtvm_1_1auto__scheduler_1_1State.html#a97b8a21210d63bea241dbab085d89b53',1,'tvm::auto_scheduler::State::vectorize()'],['../classtvm_1_1te_1_1Stage.html#a44d33e3920106e75dc7c68272f880812',1,'tvm::te::Stage::vectorize()']]],
+  ['vectorize',['vectorize',['../classtvm_1_1auto__scheduler_1_1State.html#a97b8a21210d63bea241dbab085d89b53',1,'tvm::auto_scheduler::State::vectorize()'],['../classtvm_1_1te_1_1Stage.html#a44d33e3920106e75dc7c68272f880812',1,'tvm::te::Stage::vectorize()'],['../classtvm_1_1tir_1_1ScheduleNode.html#ab4a8cd91959ceab22855ec338978bcee',1,'tvm::tir::ScheduleNode::Vectorize()']]],
   ['vectorizeloop',['VectorizeLoop',['../namespacetvm_1_1tir_1_1transform.html#af3cecb50a8b8fc8021f6a87bc27587da',1,'tvm::tir::transform']]],
   ['vectorjacobianproduct',['VectorJacobianProduct',['../namespacetvm_1_1te.html#a547183f5a311af53ab598faba423fd64',1,'tvm::te']]],
   ['vectorlow',['vectorlow',['../namespacetvm_1_1tir_1_1builtin.html#a7ed64a9fb0a7f575fc63e1e0395e96a6',1,'tvm::tir::builtin']]],
diff --git a/docs/reference/api/doxygen/search/functions_c.js b/docs/reference/api/doxygen/search/functions_c.js
index a40448b38..7323ecdb6 100644
--- a/docs/reference/api/doxygen/search/functions_c.js
+++ b/docs/reference/api/doxygen/search/functions_c.js
@@ -14,7 +14,7 @@ var searchData=
   ['left_5fshift',['left_shift',['../namespacetvm.html#ad4fceb4266c6e7644fa373eacf73359f',1,'tvm::left_shift(PrimExpr a, PrimExpr b, Span span=Span())'],['../namespacetvm.html#a6bc108896d74f5f3b5cc3b98e9780e1c',1,'tvm::left_shift(const PrimExpr &amp;a, int b, Span span=Span())'],['../namespacetvm.html#a58fbf68a58a7f32935d6c4539d292a08',1,'tvm::left_shift(int a, const PrimExpr &amp;b, Span span=Span())'],['../namespacetvm_1_1topi.html#a46057526edd4bbd0a291edf7f0c863b4',1,'tvm::topi::left_ [...]
   ['legalize',['Legalize',['../namespacetvm_1_1relay_1_1qnn_1_1transform.html#a2323e3c38cc9ae1626cd98295b83e906',1,'tvm::relay::qnn::transform::Legalize()'],['../namespacetvm_1_1relay_1_1transform.html#aae623a28eda64b60c6ee90edde103891',1,'tvm::relay::transform::Legalize()']]],
   ['legalizepackedcalls',['LegalizePackedCalls',['../namespacetvm_1_1tir_1_1transform.html#acc84aad38c2cacfae333c8970ece933d',1,'tvm::tir::transform']]],
-  ['length',['Length',['../classtvm_1_1ObjectPathNode.html#a7fb8c3f4a9510e3893cb04bd045f93db',1,'tvm::ObjectPathNode::Length()'],['../classtvm_1_1runtime_1_1String.html#a812045bfefb0f80f850d1b39f0e40489',1,'tvm::runtime::String::length()']]],
+  ['length',['length',['../classtvm_1_1runtime_1_1String.html#a812045bfefb0f80f850d1b39f0e40489',1,'tvm::runtime::String::length()'],['../classtvm_1_1ObjectPathNode.html#a7fb8c3f4a9510e3893cb04bd045f93db',1,'tvm::ObjectPathNode::Length()']]],
   ['less',['less',['../namespacetvm.html#a52fa1dc57423a077eb098960162e7b85',1,'tvm::less(PrimExpr a, PrimExpr b, Span span=Span())'],['../namespacetvm.html#ab69bb15adb82d166504b9cbeef21a4dc',1,'tvm::less(const PrimExpr &amp;a, float b, Span span=Span())'],['../namespacetvm.html#a0d5dc442dde0e69657c40803d394ea73',1,'tvm::less(float a, const PrimExpr &amp;b, Span span=Span())'],['../namespacetvm.html#a042ab9c595a8a0f63c07dbbdf75ecf9c',1,'tvm::less(int a, const PrimExpr &amp;b, Span span=Sp [...]
   ['less_5fequal',['less_equal',['../namespacetvm.html#a6dfe80d16a7b4f551c87a8901d366d08',1,'tvm::less_equal(PrimExpr a, PrimExpr b, Span span=Span())'],['../namespacetvm.html#aec0ac319177760ff01be833bae8b72bf',1,'tvm::less_equal(const PrimExpr &amp;a, float b, Span span=Span())'],['../namespacetvm.html#a5cee73ced0a40ed261dc3beec9f8247c',1,'tvm::less_equal(float a, const PrimExpr &amp;b, Span span=Span())'],['../namespacetvm.html#ad4734f467b4107f0da21a510788479c1',1,'tvm::less_equal(int  [...]
   ['let',['Let',['../classtvm_1_1relay_1_1Let.html#aa759088bca68feaf39b59681f71c8cad',1,'tvm::relay::Let::Let()'],['../classtvm_1_1tir_1_1Let.html#ae5b14cbd441631e22f090d462098cb27',1,'tvm::tir::Let::Let()']]],
diff --git a/docs/reference/api/doxygen/search/typedefs_5.js b/docs/reference/api/doxygen/search/typedefs_5.js
index 877751cc7..a0e9c8714 100644
--- a/docs/reference/api/doxygen/search/typedefs_5.js
+++ b/docs/reference/api/doxygen/search/typedefs_5.js
@@ -63,6 +63,7 @@ var searchData=
   ['ftvmrelaytotir',['FTVMRelayToTIR',['../namespacetvm.html#a3b1103f53a837ff14dc583e1c0b6b898',1,'tvm']]],
   ['ftvmschedule',['FTVMSchedule',['../namespacetvm_1_1relay.html#aff3d3515294c3128bb63a75c5d30f596',1,'tvm::relay']]],
   ['ftvmstrategy',['FTVMStrategy',['../namespacetvm_1_1relay.html#a2eb2ad4e7a83e0a28e2ad073d7bf9305',1,'tvm::relay']]],
+  ['ftvmtargetparser',['FTVMTargetParser',['../namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3',1,'tvm']]],
   ['ftvmtirtoruntime',['FTVMTIRToRuntime',['../namespacetvm.html#ade89c3d682d83c2e14ec9337084541e3',1,'tvm']]],
   ['ftype',['FType',['../classtvm_1_1ReprPrinter.html#aee843a9a8b4885f5797843bed6467415',1,'tvm::ReprPrinter']]],
   ['functype',['FuncType',['../namespacetvm_1_1relay.html#aa475de90506d48a1bb04ef6d1bd99bfb',1,'tvm::relay']]],
diff --git a/docs/reference/api/doxygen/search/typedefs_e.js b/docs/reference/api/doxygen/search/typedefs_e.js
index ab26441cd..0b968c676 100644
--- a/docs/reference/api/doxygen/search/typedefs_e.js
+++ b/docs/reference/api/doxygen/search/typedefs_e.js
@@ -1,5 +1,6 @@
 var searchData=
 [
+  ['targetjson',['TargetJSON',['../namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c',1,'tvm']]],
   ['tcalleffectkind',['TCallEffectKind',['../namespacetvm_1_1tir.html#a651e82a046f157a7d286b0985b3edb84',1,'tvm::tir']]],
   ['tensortype',['TensorType',['../namespacetvm_1_1relay.html#a52c13723bba53f4953dfd10c34d480f8',1,'tvm::relay']]],
   ['tensortypenode',['TensorTypeNode',['../namespacetvm_1_1relay.html#acfeff91bc774a278a2e9f6103a04387d',1,'tvm::relay']]],
diff --git a/docs/reference/api/doxygen/search/variables_12.js b/docs/reference/api/doxygen/search/variables_12.js
index e4adb4f91..93d214f4d 100644
--- a/docs/reference/api/doxygen/search/variables_12.js
+++ b/docs/reference/api/doxygen/search/variables_12.js
@@ -5,6 +5,7 @@ var searchData=
   ['target_5fburst_5fbytes',['target_burst_bytes',['../structtvm_1_1PoolInfoNode.html#a747c03e3eafc83b053637b735244c6d7',1,'tvm::PoolInfoNode::target_burst_bytes()'],['../structtvm_1_1PoolInfoPropertiesNode.html#aa1efe29e920f5b003894a2ae3304da17',1,'tvm::PoolInfoPropertiesNode::target_burst_bytes()']]],
   ['target_5fhost',['target_host',['../classtvm_1_1auto__scheduler_1_1SearchTaskNode.html#afe27bf8cb82dc8a1b6fffb9e5a3e6c20',1,'tvm::auto_scheduler::SearchTaskNode']]],
   ['target_5fiter_5fid',['target_iter_id',['../classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html#a5691967a42b989a54cf8c40c1627988e',1,'tvm::auto_scheduler::ComputeAtStepNode']]],
+  ['target_5fparser',['target_parser',['../classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f',1,'tvm::TargetKindNode']]],
   ['target_5fshape',['target_shape',['../structtvm_1_1relay_1_1AffineGridAttrs.html#a3e8a722c28015e4fa002da324fc6d5b3',1,'tvm::relay::AffineGridAttrs']]],
   ['target_5fstage_5fid',['target_stage_id',['../classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html#ad3c69aebb4b821c8e975ce0c58dc8bbb',1,'tvm::auto_scheduler::ComputeAtStepNode']]],
   ['targets',['targets',['../structtvm_1_1PoolInfoNode.html#a45f1c1873709d0f38d2e9098b7e979cb',1,'tvm::PoolInfoNode']]],
diff --git a/docs/reference/api/doxygen/target_8h_source.html b/docs/reference/api/doxygen/target_8h_source.html
index c9ad9b3c4..1373d1736 100644
--- a/docs/reference/api/doxygen/target_8h_source.html
+++ b/docs/reference/api/doxygen/target_8h_source.html
@@ -85,7 +85,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:167</div></div>
 <div class="ttc" id="classtvm_1_1TargetNode_html_ac19a4ee0f0ec7ea607ec746bc4551b71"><div class="ttname"><a href="classtvm_1_1TargetNode.html#ac19a4ee0f0ec7ea607ec746bc4551b71">tvm::TargetNode::kind</a></div><div class="ttdeci">TargetKind kind</div><div class="ttdoc">The kind of the target device. </div><div class="ttdef"><b>Definition:</b> target.h:49</div></div>
 <div class="ttc" id="classtvm_1_1TargetNode_html_a3046260cd16b7b134fa99705b41d2aee"><div class="ttname"><a href="classtvm_1_1TargetNode.html#a3046260cd16b7b134fa99705b41d2aee">tvm::TargetNode::tag</a></div><div class="ttdeci">String tag</div><div class="ttdoc">Tag of the the target, can be empty. </div><div class="ttdef"><b>Definition:</b> target.h:53</div></div>
-<div class="ttc" id="classtvm_1_1TargetKind_html"><div class="ttname"><a href="classtvm_1_1TargetKind.html">tvm::TargetKind</a></div><div class="ttdoc">Managed reference class to TargetKindNode. </div><div class="ttdef"><b>Definition:</b> target_kind.h:128</div></div>
+<div class="ttc" id="classtvm_1_1TargetKind_html"><div class="ttname"><a href="classtvm_1_1TargetKind.html">tvm::TargetKind</a></div><div class="ttdoc">Managed reference class to TargetKindNode. </div><div class="ttdef"><b>Definition:</b> target_kind.h:140</div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
 <div class="ttc" id="target__kind_8h_html"><div class="ttname"><a href="target__kind_8h.html">target_kind.h</a></div><div class="ttdoc">Target kind registry. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
diff --git a/docs/reference/api/doxygen/target__kind_8h.html b/docs/reference/api/doxygen/target__kind_8h.html
index ec0739476..405d7606c 100644
--- a/docs/reference/api/doxygen/target__kind_8h.html
+++ b/docs/reference/api/doxygen/target__kind_8h.html
@@ -144,6 +144,11 @@ Macros</h2></td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="typedef-members"></a>
 Typedefs</h2></td></tr>
+<tr class="memitem:ad27a76489f3ede07b5d3f0dd3f97d93c"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">tvm::TargetJSON</a> = Map&lt; String, ObjectRef &gt;</td></tr>
+<tr class="memdesc:ad27a76489f3ede07b5d3f0dd3f97d93c"><td class="mdescLeft">&#160;</td><td class="mdescRight">TargetParser to apply on instantiation of a given <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a>.  <a href="namespacetvm.html#ad27a76489f3ede07b5d3f0dd3f97d93c">More...</a><br /></td></tr>
+<tr class="separator:ad27a76489f3ede07b5d3f0dd3f97d93c"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a069e7a9aa20098c3406c6fbcf29092b3"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a069e7a9aa20098c3406c6fbcf29092b3">tvm::FTVMTargetParser</a> = TypedPackedFunc&lt; TargetJSON(TargetJSON)&gt;</td></tr>
+<tr class="separator:a069e7a9aa20098c3406c6fbcf29092b3"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3b1103f53a837ff14dc583e1c0b6b898"><td class="memItemLeft" align="right" valign="top">using&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a3b1103f53a837ff14dc583e1c0b6b898">tvm::FTVMRelayToTIR</a> = transform::Pass</td></tr>
 <tr class="memdesc:a3b1103f53a837ff14dc583e1c0b6b898"><td class="mdescLeft">&#160;</td><td class="mdescRight">RelayToTIR <a class="el" href="classtvm_1_1transform_1_1Pass.html">tvm::transform::Pass</a> specific to a <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a>.  <a href="namespacetvm.html#a3b1103f53a837ff14dc583e1c0b6b898">More...</a><br /></td></tr>
 <tr class="separator:a3b1103f53a837ff14dc583e1c0b6b898"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -189,11 +194,11 @@ Variables</h2></td></tr>
       </table>
 </div><div class="memdoc">
 <b>Value:</b><div class="fragment"><div class="line"><a class="code" href="object_8h.html#a73bf3e57b9d7a6e0dd55d901321d01ed">TVM_STR_CONCAT</a>(<a class="code" href="target__kind_8h.html#a2341708a81fcee611c3c5a156596522c">TVM_TARGET_KIND_REGISTER_VAR_DEF</a>, __COUNTER__) = <a class="code" href="classtvm_1_1TargetKindRegEntry.html#a478c1bd27f0b8dd1b95c58808f8d0c70">\</a></div><div class="line"><a class="code" href="classtvm_1_1TargetKindRegEntry.html#a478c1bd27f0b8dd1b95c58808f8d0c70">   [...]
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_ae3ce5349493f402b82e755a0a180bd9a"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#ae3ce5349493f402b82e755a0a180bd9a">tvm::TargetKindRegEntry::set_device_type</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_device_type(int device_type)</div><div class="ttdoc">Set DLPack&amp;#39;s device_type the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:344</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a36f21402bccb03300478d6c85bd05512"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a36f21402bccb03300478d6c85bd05512">tvm::TargetKindRegEntry::set_name</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_name()</div><div class="ttdoc">Set name of the TargetKind to be the same as registry if it is empty. </div><div class="ttdef"><b>Definition:</b> target_kind.h:377</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_ae3ce5349493f402b82e755a0a180bd9a"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#ae3ce5349493f402b82e755a0a180bd9a">tvm::TargetKindRegEntry::set_device_type</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_device_type(int device_type)</div><div class="ttdoc">Set DLPack&amp;#39;s device_type the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:361</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a36f21402bccb03300478d6c85bd05512"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a36f21402bccb03300478d6c85bd05512">tvm::TargetKindRegEntry::set_name</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_name()</div><div class="ttdoc">Set name of the TargetKind to be the same as registry if it is empty. </div><div class="ttdef"><b>Definition:</b> target_kind.h:400</div></div>
 <div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a478c1bd27f0b8dd1b95c58808f8d0c70"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a478c1bd27f0b8dd1b95c58808f8d0c70">tvm::TargetKindRegEntry::RegisterOrGet</a></div><div class="ttdeci">static TargetKindRegEntry &amp; RegisterOrGet(const String &amp;target_kind_name)</div><div class="ttdoc">Register or get a new entry. </div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_accd2e15133cf6e6fe2703f57464eae89"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#accd2e15133cf6e6fe2703f57464eae89">tvm::TargetKindRegEntry::add_attr_option</a></div><div class="ttdeci">TargetKindRegEntry &amp; add_attr_option(const String &amp;key)</div><div class="ttdoc">Register a valid configuration option and its ValueType for validation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:362</div></div>
-<div class="ttc" id="target__kind_8h_html_a2341708a81fcee611c3c5a156596522c"><div class="ttname"><a href="target__kind_8h.html#a2341708a81fcee611c3c5a156596522c">TVM_TARGET_KIND_REGISTER_VAR_DEF</a></div><div class="ttdeci">#define TVM_TARGET_KIND_REGISTER_VAR_DEF</div><div class="ttdef"><b>Definition:</b> target_kind.h:384</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_accd2e15133cf6e6fe2703f57464eae89"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#accd2e15133cf6e6fe2703f57464eae89">tvm::TargetKindRegEntry::add_attr_option</a></div><div class="ttdeci">TargetKindRegEntry &amp; add_attr_option(const String &amp;key)</div><div class="ttdoc">Register a valid configuration option and its ValueType for validation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:385</div></div>
+<div class="ttc" id="target__kind_8h_html_a2341708a81fcee611c3c5a156596522c"><div class="ttname"><a href="target__kind_8h.html#a2341708a81fcee611c3c5a156596522c">TVM_TARGET_KIND_REGISTER_VAR_DEF</a></div><div class="ttdeci">#define TVM_TARGET_KIND_REGISTER_VAR_DEF</div><div class="ttdef"><b>Definition:</b> target_kind.h:407</div></div>
 </div><!-- fragment -->
 <p>Register a new target kind, or set attribute of the corresponding target kind. </p>
 <dl class="params"><dt>Parameters</dt><dd>
diff --git a/docs/reference/api/doxygen/target__kind_8h_source.html b/docs/reference/api/doxygen/target__kind_8h_source.html
index b99318f37..7806c46af 100644
--- a/docs/reference/api/doxygen/target__kind_8h_source.html
+++ b/docs/reference/api/doxygen/target__kind_8h_source.html
@@ -66,53 +66,55 @@ $(function() {
 <div class="title">target_kind.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="target__kind_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mor [...]
+<a href="target__kind_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mor [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMRetValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMRetValue.html">tvm::runtime::TVMRetValue</a></div><div class="ttdoc">Return Value container, Unlike TVMArgValue, which only holds reference and do not delete the underlyi...</div><div class="ttdef"><b>Definition:</b> packed_func.h:799</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_1_1attr_html_a7e4e7cd47471a9089022214d63d24206"><div class="ttname"><a href="namespacetvm_1_1tir_1_1attr.html#a7e4e7cd47471a9089022214d63d24206">tvm::tir::attr::device_type</a></div><div class="ttdeci">constexpr const char * device_type</div><div class="ttdoc">The device type. </div><div class="ttdef"><b>Definition:</b> stmt.h:1357</div></div>
-<div class="ttc" id="structtvm_1_1detail_1_1is__specialized_html"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized.html">tvm::detail::is_specialized</a></div><div class="ttdef"><b>Definition:</b> target_kind.h:267</div></div>
-<div class="ttc" id="namespacetvm_1_1attr_html_a46a5e3d6985a91653fb43e94069cccd1"><div class="ttname"><a href="namespacetvm_1_1attr.html#a46a5e3d6985a91653fb43e94069cccd1">tvm::attr::kRelayToTIR</a></div><div class="ttdeci">constexpr const char * kRelayToTIR</div><div class="ttdoc">A TargetKind attribute of type FTVMRelayToTIR. If set, then the target kind name also corresponds to ...</div><div class="ttdef"><b>Definition:</b> target_kind.h:413</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a2995c32e12246e892f7f4cb621a2819c"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a2995c32e12246e892f7f4cb621a2819c">tvm::TargetKindRegEntry::set_default_keys</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_default_keys(std::vector&lt; String &gt; keys)</div><div class="ttdoc">Set DLPack&amp;#39;s device_type the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:349</div></div>
+<div class="ttc" id="structtvm_1_1detail_1_1is__specialized_html"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized.html">tvm::detail::is_specialized</a></div><div class="ttdef"><b>Definition:</b> target_kind.h:284</div></div>
+<div class="ttc" id="namespacetvm_1_1attr_html_a46a5e3d6985a91653fb43e94069cccd1"><div class="ttname"><a href="namespacetvm_1_1attr.html#a46a5e3d6985a91653fb43e94069cccd1">tvm::attr::kRelayToTIR</a></div><div class="ttdeci">constexpr const char * kRelayToTIR</div><div class="ttdoc">A TargetKind attribute of type FTVMRelayToTIR. If set, then the target kind name also corresponds to ...</div><div class="ttdef"><b>Definition:</b> target_kind.h:436</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a2995c32e12246e892f7f4cb621a2819c"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a2995c32e12246e892f7f4cb621a2819c">tvm::TargetKindRegEntry::set_default_keys</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_default_keys(std::vector&lt; String &gt; keys)</div><div class="ttdoc">Set DLPack&amp;#39;s device_type the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:366</div></div>
 <div class="ttc" id="node_8h_html"><div class="ttname"><a href="node_8h.html">node.h</a></div><div class="ttdoc">Definitions and helper macros for IR/AST nodes. </div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindNode_html_a87b1530870f586aa78996f7449e445a6"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a87b1530870f586aa78996f7449e445a6">tvm::TargetKindNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> target_kind.h:86</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a36f21402bccb03300478d6c85bd05512"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a36f21402bccb03300478d6c85bd05512">tvm::TargetKindRegEntry::set_name</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_name()</div><div class="ttdoc">Set name of the TargetKind to be the same as registry if it is empty. </div><div class="ttdef"><b>Definition:</b> target_kind.h:377</div></div>
-<div class="ttc" id="structtvm_1_1detail_1_1is__specialized_3_01Container_3_01Args_8_8_8_01_4_00_01Container_01_4_html_a8dee3a1604498d6bc64948f1c0d19dc2"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized_3_01Container_3_01Args_8_8_8_01_4_00_01Container_01_4.html#a8dee3a1604498d6bc64948f1c0d19dc2">tvm::detail::is_specialized&lt; Container&lt; Args... &gt;, Container &gt;::type</a></div><div class="ttdeci">std::true_type type</div><div class="ttdef"><b>Definition:</b> tar [...]
+<div class="ttc" id="classtvm_1_1TargetKindNode_html_a87b1530870f586aa78996f7449e445a6"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a87b1530870f586aa78996f7449e445a6">tvm::TargetKindNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> target_kind.h:98</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a36f21402bccb03300478d6c85bd05512"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a36f21402bccb03300478d6c85bd05512">tvm::TargetKindRegEntry::set_name</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_name()</div><div class="ttdoc">Set name of the TargetKind to be the same as registry if it is empty. </div><div class="ttdef"><b>Definition:</b> target_kind.h:400</div></div>
+<div class="ttc" id="structtvm_1_1detail_1_1is__specialized_3_01Container_3_01Args_8_8_8_01_4_00_01Container_01_4_html_a8dee3a1604498d6bc64948f1c0d19dc2"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized_3_01Container_3_01Args_8_8_8_01_4_00_01Container_01_4.html#a8dee3a1604498d6bc64948f1c0d19dc2">tvm::detail::is_specialized&lt; Container&lt; Args... &gt;, Container &gt;::type</a></div><div class="ttdeci">std::true_type type</div><div class="ttdef"><b>Definition:</b> tar [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:167</div></div>
 <div class="ttc" id="classtvm_1_1AttrRegistry_html"><div class="ttname"><a href="classtvm_1_1AttrRegistry.html">tvm::AttrRegistry</a></div><div class="ttdef"><b>Definition:</b> executor.h:43</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1transform_html_afa666ade112e9955059095d695238a9a"><div class="ttname"><a href="namespacetvm_1_1relay_1_1transform.html#afa666ade112e9955059095d695238a9a">tvm::relay::transform::Pass</a></div><div class="ttdeci">tvm::transform::Pass Pass</div><div class="ttdef"><b>Definition:</b> transform.h:43</div></div>
-<div class="ttc" id="classtvm_1_1TargetKind_html"><div class="ttname"><a href="classtvm_1_1TargetKind.html">tvm::TargetKind</a></div><div class="ttdoc">Managed reference class to TargetKindNode. </div><div class="ttdef"><b>Definition:</b> target_kind.h:128</div></div>
+<div class="ttc" id="classtvm_1_1TargetKind_html"><div class="ttname"><a href="classtvm_1_1TargetKind.html">tvm::TargetKind</a></div><div class="ttdoc">Managed reference class to TargetKindNode. </div><div class="ttdef"><b>Definition:</b> target_kind.h:140</div></div>
 <div class="ttc" id="classtvm_1_1AttrRegistryMapContainerMap_html"><div class="ttname"><a href="classtvm_1_1AttrRegistryMapContainerMap.html">tvm::AttrRegistryMapContainerMap</a></div><div class="ttdoc">Generic attribute map. </div><div class="ttdef"><b>Definition:</b> attr_registry_map.h:38</div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_accd2e15133cf6e6fe2703f57464eae89"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#accd2e15133cf6e6fe2703f57464eae89">tvm::TargetKindRegEntry::add_attr_option</a></div><div class="ttdeci">TargetKindRegEntry &amp; add_attr_option(const String &amp;key)</div><div class="ttdoc">Register a valid configuration option and its ValueType for validation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:362</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindAttrMap_html_abfd817be636e60822a33429ba30056bd"><div class="ttname"><a href="classtvm_1_1TargetKindAttrMap.html#abfd817be636e60822a33429ba30056bd">tvm::TargetKindAttrMap::TargetKindAttrMap</a></div><div class="ttdeci">TargetKindAttrMap(const AttrRegistryMapContainerMap&lt; TargetKind &gt; &amp;map)</div><div class="ttdef"><b>Definition:</b> target_kind.h:162</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_accd2e15133cf6e6fe2703f57464eae89"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#accd2e15133cf6e6fe2703f57464eae89">tvm::TargetKindRegEntry::add_attr_option</a></div><div class="ttdeci">TargetKindRegEntry &amp; add_attr_option(const String &amp;key)</div><div class="ttdoc">Register a valid configuration option and its ValueType for validation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:385</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindAttrMap_html_abfd817be636e60822a33429ba30056bd"><div class="ttname"><a href="classtvm_1_1TargetKindAttrMap.html#abfd817be636e60822a33429ba30056bd">tvm::TargetKindAttrMap::TargetKindAttrMap</a></div><div class="ttdeci">TargetKindAttrMap(const AttrRegistryMapContainerMap&lt; TargetKind &gt; &amp;map)</div><div class="ttdef"><b>Definition:</b> target_kind.h:174</div></div>
 <div class="ttc" id="ir_2transform_8h_html"><div class="ttname"><a href="ir_2transform_8h.html">transform.h</a></div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></div><div class="ttdoc">Helper structure to register TargetKind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:175</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html">tvm::TargetKindRegEntry</a></div><div class="ttdoc">Helper structure to register TargetKind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:187</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindNode_html_a713525ca63d41aacadec9db01d28f59f"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a713525ca63d41aacadec9db01d28f59f">tvm::TargetKindNode::target_parser</a></div><div class="ttdeci">FTVMTargetParser target_parser</div><div class="ttdoc">Function used to parse a JSON target during creation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:96</div></div>
 <div class="ttc" id="classtvm_1_1transform_1_1Pass_html"><div class="ttname"><a href="classtvm_1_1transform_1_1Pass.html">tvm::transform::Pass</a></div><div class="ttdef"><b>Definition:</b> transform.h:363</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc</a></div><div class="ttdoc">Please refer to TypedPackedFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> packed_func.h:60</div></div>
-<div class="ttc" id="structtvm_1_1detail_1_1ValueTypeInfoMaker_html"><div class="ttname"><a href="structtvm_1_1detail_1_1ValueTypeInfoMaker.html">tvm::detail::ValueTypeInfoMaker</a></div><div class="ttdef"><b>Definition:</b> target_kind.h:66</div></div>
+<div class="ttc" id="structtvm_1_1detail_1_1ValueTypeInfoMaker_html"><div class="ttname"><a href="structtvm_1_1detail_1_1ValueTypeInfoMaker.html">tvm::detail::ValueTypeInfoMaker</a></div><div class="ttdef"><b>Definition:</b> target_kind.h:76</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a661d95f170bca230773914caeef3fe52"><div class="ttname"><a href="namespacetvm_1_1relay.html#a661d95f170bca230773914caeef3fe52">tvm::relay::Type</a></div><div class="ttdeci">tvm::Type Type</div><div class="ttdef"><b>Definition:</b> type.h:47</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindNode_html_aa62e049ba158730d9ab88e4c0b173de9"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#aa62e049ba158730d9ab88e4c0b173de9">tvm::TargetKindNode::default_keys</a></div><div class="ttdeci">Array&lt; String &gt; default_keys</div><div class="ttdoc">Default keys of the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:82</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindNode_html_aa62e049ba158730d9ab88e4c0b173de9"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#aa62e049ba158730d9ab88e4c0b173de9">tvm::TargetKindNode::default_keys</a></div><div class="ttdeci">Array&lt; String &gt; default_keys</div><div class="ttdoc">Default keys of the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:92</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
 <div class="ttc" id="classtvm_1_1AttrRegistryMap_html"><div class="ttname"><a href="classtvm_1_1AttrRegistryMap.html">tvm::AttrRegistryMap</a></div><div class="ttdoc">Map&lt;Key, ValueType&gt; used to store meta-data. </div><div class="ttdef"><b>Definition:</b> attr_registry_map.h:101</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindNode_html"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></div><div class="ttdoc">Target kind, specifies the kind of the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:75</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindNode_html"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html">tvm::TargetKindNode</a></div><div class="ttdoc">Target kind, specifies the kind of the target. </div><div class="ttdef"><b>Definition:</b> target_kind.h:85</div></div>
 <div class="ttc" id="attr__registry__map_8h_html"><div class="ttname"><a href="attr__registry__map_8h.html">attr_registry_map.h</a></div><div class="ttdoc">Attribute map used in registry. </div></div>
-<div class="ttc" id="namespacetvm_1_1attr_html_a17f834882ba3cd00890329433e8e81dd"><div class="ttname"><a href="namespacetvm_1_1attr.html#a17f834882ba3cd00890329433e8e81dd">tvm::attr::kIsExternalCodegen</a></div><div class="ttdeci">constexpr const char * kIsExternalCodegen</div><div class="ttdoc">A TargetKind attribute of type Bool. If true, then the target kind name also corresponds to an extern...</div><div class="ttdef"><b>Definition:</b> target_kind.h:404</div></div>
+<div class="ttc" id="namespacetvm_1_1attr_html_a17f834882ba3cd00890329433e8e81dd"><div class="ttname"><a href="namespacetvm_1_1attr.html#a17f834882ba3cd00890329433e8e81dd">tvm::attr::kIsExternalCodegen</a></div><div class="ttdeci">constexpr const char * kIsExternalCodegen</div><div class="ttdoc">A TargetKind attribute of type Bool. If true, then the target kind name also corresponds to an extern...</div><div class="ttdef"><b>Definition:</b> target_kind.h:427</div></div>
 <div class="ttc" id="object_8h_html_a3aea9b3f65aeb9150c0fa7800e5573c6"><div class="ttname"><a href="object_8h.html#a3aea9b3f65aeb9150c0fa7800e5573c6">TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">#define TVM_DECLARE_FINAL_OBJECT_INFO(TypeName, ParentType)</div><div class="ttdoc">helper macro to declare type information in a final class. </div><div class="ttdef"><b>Definition:</b> object.h:671</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindNode_html_a18459286d8d501892992a4209ad08652"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a18459286d8d501892992a4209ad08652">tvm::TargetKindNode::device_type</a></div><div class="ttdeci">int device_type</div><div class="ttdoc">Device type of target kind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:80</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a4fa4f8e5fa280ddf3dc71310afd467a5"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a4fa4f8e5fa280ddf3dc71310afd467a5">tvm::TargetKindRegEntry::set_attr</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_attr(const String &amp;attr_name, const ValueType &amp;value, int plevel=10)</div><div class="ttdoc">Register additional attributes to target_kind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:33 [...]
-<div class="ttc" id="structtvm_1_1detail_1_1is__specialized_html_a3ea7783c457d7ddc82100674292724f4"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized.html#a3ea7783c457d7ddc82100674292724f4">tvm::detail::is_specialized::type</a></div><div class="ttdeci">std::false_type type</div><div class="ttdef"><b>Definition:</b> target_kind.h:268</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindNode_html_a18459286d8d501892992a4209ad08652"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a18459286d8d501892992a4209ad08652">tvm::TargetKindNode::device_type</a></div><div class="ttdeci">int device_type</div><div class="ttdoc">Device type of target kind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:90</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a4fa4f8e5fa280ddf3dc71310afd467a5"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a4fa4f8e5fa280ddf3dc71310afd467a5">tvm::TargetKindRegEntry::set_attr</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_attr(const String &amp;attr_name, const ValueType &amp;value, int plevel=10)</div><div class="ttdoc">Register additional attributes to target_kind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:35 [...]
+<div class="ttc" id="structtvm_1_1detail_1_1is__specialized_html_a3ea7783c457d7ddc82100674292724f4"><div class="ttname"><a href="structtvm_1_1detail_1_1is__specialized.html#a3ea7783c457d7ddc82100674292724f4">tvm::detail::is_specialized::type</a></div><div class="ttdeci">std::false_type type</div><div class="ttdef"><b>Definition:</b> target_kind.h:285</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Map_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Map.html">tvm::runtime::Map</a></div><div class="ttdoc">Map container of NodeRef-&gt;NodeRef in DSL graph. Map implements copy on write semantics, which means map is mutable but copy will happen when array is referenced in more than two places. </div><div class="ttdef"><b>Definition:</b> map.h:1268</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a817ba6c23b7ee1821c48a75edf255a30"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a817ba6c23b7ee1821c48a75edf255a30">tvm::runtime::Object::TypeIndex2Key</a></div><div class="ttdeci">static std::string TypeIndex2Key(uint32_t tindex)</div><div class="ttdoc">Get the type key of the corresponding index from runtime. </div></div>
-<div class="ttc" id="classtvm_1_1TargetKindNode_html_a47f02c66d0f972befdfb29ec592ecba0"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a47f02c66d0f972befdfb29ec592ecba0">tvm::TargetKindNode::preprocessor</a></div><div class="ttdeci">PackedFunc preprocessor</div><div class="ttdoc">Function used to preprocess on target creation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:84</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindNode_html_a47f02c66d0f972befdfb29ec592ecba0"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a47f02c66d0f972befdfb29ec592ecba0">tvm::TargetKindNode::preprocessor</a></div><div class="ttdeci">PackedFunc preprocessor</div><div class="ttdoc">Function used to preprocess on target creation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:94</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1PackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1PackedFunc.html">tvm::runtime::PackedFunc</a></div><div class="ttdoc">Packed function is a type-erased function. The arguments are passed by packed format. </div><div class="ttdef"><b>Definition:</b> packed_func.h:138</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Optional_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Optional.html">tvm::runtime::Optional</a></div><div class="ttdoc">Optional container that to represent to a Nullable variant of T. </div><div class="ttdef"><b>Definition:</b> optional.h:51</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindAttrMap_html"><div class="ttname"><a href="classtvm_1_1TargetKindAttrMap.html">tvm::TargetKindAttrMap</a></div><div class="ttdoc">Map&lt;TargetKind, ValueType&gt; used to store meta-information about TargetKind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:72</div></div>
-<div class="ttc" id="classtvm_1_1TargetKind_html_ae3c4bff01e4c03982e4b92b3352c6532"><div class="ttname"><a href="classtvm_1_1TargetKind.html#ae3c4bff01e4c03982e4b92b3352c6532">tvm::TargetKind::GetAttrMap</a></div><div class="ttdeci">static TargetKindAttrMap&lt; ValueType &gt; GetAttrMap(const String &amp;attr_name)</div><div class="ttdoc">Get the attribute map given the attribute name. </div><div class="ttdef"><b>Definition:</b> target_kind.h:330</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindNode_html_a496c8f36bc4ead9952b6a1fd369d20ad"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a496c8f36bc4ead9952b6a1fd369d20ad">tvm::TargetKindNode::name</a></div><div class="ttdeci">String name</div><div class="ttdoc">Name of the target kind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:78</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindAttrMap_html"><div class="ttname"><a href="classtvm_1_1TargetKindAttrMap.html">tvm::TargetKindAttrMap</a></div><div class="ttdoc">Map&lt;TargetKind, ValueType&gt; used to store meta-information about TargetKind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:82</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a21152c83f61180dcb6293226a98025a8"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a21152c83f61180dcb6293226a98025a8">tvm::TargetKindRegEntry::set_target_parser</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_target_parser(FTVMTargetParser parser)</div><div class="ttdoc">Set the parsing function applied upon target creation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:379</div></div>
+<div class="ttc" id="classtvm_1_1TargetKind_html_ae3c4bff01e4c03982e4b92b3352c6532"><div class="ttname"><a href="classtvm_1_1TargetKind.html#ae3c4bff01e4c03982e4b92b3352c6532">tvm::TargetKind::GetAttrMap</a></div><div class="ttdeci">static TargetKindAttrMap&lt; ValueType &gt; GetAttrMap(const String &amp;attr_name)</div><div class="ttdoc">Get the attribute map given the attribute name. </div><div class="ttdef"><b>Definition:</b> target_kind.h:347</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindNode_html_a496c8f36bc4ead9952b6a1fd369d20ad"><div class="ttname"><a href="classtvm_1_1TargetKindNode.html#a496c8f36bc4ead9952b6a1fd369d20ad">tvm::TargetKindNode::name</a></div><div class="ttdeci">String name</div><div class="ttdoc">Name of the target kind. </div><div class="ttdef"><b>Definition:</b> target_kind.h:88</div></div>
 <div class="ttc" id="object_8h_html_a782d0de62fbf75736e29c1e79c22c7f1"><div class="ttname"><a href="object_8h.html#a782d0de62fbf75736e29c1e79c22c7f1">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:728</div></div>
-<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a00b1eb0ab1927210a6a519baecb3085e"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a00b1eb0ab1927210a6a519baecb3085e">tvm::TargetKindRegEntry::set_attrs_preprocessor</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_attrs_preprocessor(FLambda f)</div><div class="ttdoc">Set the pre-processing function applied upon target creation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:355</div></div>
+<div class="ttc" id="classtvm_1_1TargetKindRegEntry_html_a00b1eb0ab1927210a6a519baecb3085e"><div class="ttname"><a href="classtvm_1_1TargetKindRegEntry.html#a00b1eb0ab1927210a6a519baecb3085e">tvm::TargetKindRegEntry::set_attrs_preprocessor</a></div><div class="ttdeci">TargetKindRegEntry &amp; set_attrs_preprocessor(FLambda f)</div><div class="ttdoc">Set the pre-processing function applied upon target creation. </div><div class="ttdef"><b>Definition:</b> target_kind.h:372</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/python/auto_scheduler.html b/docs/reference/api/python/auto_scheduler.html
index fe5e00f79..6f7d1aa1e 100644
--- a/docs/reference/api/python/auto_scheduler.html
+++ b/docs/reference/api/python/auto_scheduler.html
@@ -1597,7 +1597,7 @@ history states as starting point to perform Evolutionary Search).</p></li>
 
 <dl class="py class">
 <dt class="sig sig-object py" id="tvm.auto_scheduler.SketchPolicy">
-<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">SketchPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">program_cost_model</span></span><span class="o"><span class="pre">=</span></span><span class="defau [...]
+<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">SketchPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">program_cost_model</span></span><span class="o"><span class="pre">=</span></span><span class="defau [...]
 <dd><p>The search policy that searches in a hierarchical search space defined by sketches.
 The policy randomly samples programs from the space defined by sketches and use evolutionary
 search to fine-tune them.</p>
@@ -1881,7 +1881,7 @@ Candidates:
 
 <dl class="py function">
 <dt class="sig sig-object py" id="tvm.auto_scheduler.auto_schedule">
-<span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">auto_schedule</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">search_policy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em clas [...]
+<span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">auto_schedule</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">search_policy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em clas [...]
 <dd><p>THIS API IS DEPRECATED.</p>
 <p>Run auto scheduling search for a task.</p>
 <dl class="field-list simple">
diff --git a/docs/reference/api/typedoc/classes/bytestreamreader.html b/docs/reference/api/typedoc/classes/bytestreamreader.html
index 1e6db89a5..546d43bea 100644
--- a/docs/reference/api/typedoc/classes/bytestreamreader.html
+++ b/docs/reference/api/typedoc/classes/bytestreamreader.html
@@ -119,7 +119,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -141,7 +141,7 @@
 					<div class="tsd-signature tsd-kind-icon">bytes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Uint8Array</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -151,7 +151,7 @@
 					<div class="tsd-signature tsd-kind-icon">offset<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 0</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/rpc_server.ts#L42">rpc_server.ts:42</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/rpc_server.ts#L42">rpc_server.ts:42</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -168,7 +168,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/rpc_server.ts#L63">rpc_server.ts:63</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/rpc_server.ts#L63">rpc_server.ts:63</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">Uint8Array</span></h4>
@@ -185,7 +185,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/rpc_server.ts#L49">rpc_server.ts:49</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/rpc_server.ts#L49">rpc_server.ts:49</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -202,7 +202,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/rpc_server.ts#L57">rpc_server.ts:57</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/rpc_server.ts#L57">rpc_server.ts:57</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
diff --git a/docs/reference/api/typedoc/classes/cachedcallstack.html b/docs/reference/api/typedoc/classes/cachedcallstack.html
index 09fbe0dda..33763af37 100644
--- a/docs/reference/api/typedoc/classes/cachedcallstack.html
+++ b/docs/reference/api/typedoc/classes/cachedcallstack.html
@@ -144,7 +144,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L223">memory.ts:223</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L223">memory.ts:223</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -172,7 +172,7 @@
 					<div class="tsd-signature tsd-kind-icon">temp<wbr>Args<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><a href="../interfaces/disposable.html" class="tsd-signature-type">Disposable</a><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = []</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L208">memory.ts:208</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L208">memory.ts:208</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -194,7 +194,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L312">memory.ts:312</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L312">memory.ts:312</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -226,7 +226,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L284">memory.ts:284</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L284">memory.ts:284</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -262,7 +262,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L388">memory.ts:388</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L388">memory.ts:388</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -300,7 +300,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L376">memory.ts:376</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L376">memory.ts:376</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -340,7 +340,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L267">memory.ts:267</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L267">memory.ts:267</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -373,7 +373,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L243">memory.ts:243</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L243">memory.ts:243</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -390,7 +390,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L321">memory.ts:321</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L321">memory.ts:321</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -422,7 +422,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L252">memory.ts:252</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L252">memory.ts:252</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -444,7 +444,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L359">memory.ts:359</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L359">memory.ts:359</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -470,7 +470,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L342">memory.ts:342</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L342">memory.ts:342</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -496,7 +496,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L350">memory.ts:350</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L350">memory.ts:350</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -522,7 +522,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L326">memory.ts:326</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L326">memory.ts:326</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -548,7 +548,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L363">memory.ts:363</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L363">memory.ts:363</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -574,7 +574,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L346">memory.ts:346</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L346">memory.ts:346</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -600,7 +600,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L334">memory.ts:334</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L334">memory.ts:334</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
diff --git a/docs/reference/api/typedoc/classes/dldatatype.html b/docs/reference/api/typedoc/classes/dldatatype.html
index 81d28f7bf..ccc087c60 100644
--- a/docs/reference/api/typedoc/classes/dldatatype.html
+++ b/docs/reference/api/typedoc/classes/dldatatype.html
@@ -119,7 +119,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L262">runtime.ts:262</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L262">runtime.ts:262</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -147,7 +147,7 @@
 					<div class="tsd-signature tsd-kind-icon">bits<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L260">runtime.ts:260</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L260">runtime.ts:260</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">code<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L258">runtime.ts:258</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L258">runtime.ts:258</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -177,7 +177,7 @@
 					<div class="tsd-signature tsd-kind-icon">lanes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L262">runtime.ts:262</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L262">runtime.ts:262</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -199,7 +199,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L279">runtime.ts:279</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L279">runtime.ts:279</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -216,7 +216,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L270">runtime.ts:270</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L270">runtime.ts:270</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">string</span></h4>
diff --git a/docs/reference/api/typedoc/classes/dldevice.html b/docs/reference/api/typedoc/classes/dldevice.html
index 7303db0c6..3423cbdf9 100644
--- a/docs/reference/api/typedoc/classes/dldevice.html
+++ b/docs/reference/api/typedoc/classes/dldevice.html
@@ -118,7 +118,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L202">runtime.ts:202</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L202">runtime.ts:202</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -146,7 +146,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<wbr>Id<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L200">runtime.ts:200</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L200">runtime.ts:200</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -161,7 +161,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<wbr>Type<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L198">runtime.ts:198</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L198">runtime.ts:198</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -183,7 +183,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L223">runtime.ts:223</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L223">runtime.ts:223</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -205,7 +205,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L230">runtime.ts:230</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L230">runtime.ts:230</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">string</span></h4>
diff --git a/docs/reference/api/typedoc/classes/environment.html b/docs/reference/api/typedoc/classes/environment.html
index 7882cd40c..243aabe20 100644
--- a/docs/reference/api/typedoc/classes/environment.html
+++ b/docs/reference/api/typedoc/classes/environment.html
@@ -125,7 +125,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/environment.ts#L86">environment.ts:86</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/environment.ts#L86">environment.ts:86</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -169,7 +169,7 @@
 					<aside class="tsd-sources">
 						<p>Implementation of <a href="../interfaces/libraryprovider.html">LibraryProvider</a>.<a href="../interfaces/libraryprovider.html#imports">imports</a></p>
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/environment.ts#L70">environment.ts:70</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/environment.ts#L70">environment.ts:70</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 					<div class="tsd-signature tsd-kind-icon">logger<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>msg<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/environment.ts#L69">environment.ts:69</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/environment.ts#L69">environment.ts:69</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-type-declaration">
@@ -210,7 +210,7 @@
 					<div class="tsd-signature tsd-kind-icon">packedCFunc<wbr>Table<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">ctypes.FTVMWasmPackedCFunc</span><span class="tsd-signature-symbol"> | </span><span class="tsd-signature-type">undefined</span><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = [undefined,]</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/environment.ts#L78">environment.ts:78</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/environment.ts#L78">environment.ts:78</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -228,7 +228,7 @@
 					<div class="tsd-signature tsd-kind-icon">packedCFunc<wbr>Table<wbr>Free<wbr>Id<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">number</span><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = []</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/environment.ts#L84">environment.ts:84</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/environment.ts#L84">environment.ts:84</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -250,7 +250,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/environment.ts#L105">environment.ts:105</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/environment.ts#L105">environment.ts:105</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
diff --git a/docs/reference/api/typedoc/classes/ffilibrary.html b/docs/reference/api/typedoc/classes/ffilibrary.html
index 2ddcc8775..f016065a9 100644
--- a/docs/reference/api/typedoc/classes/ffilibrary.html
+++ b/docs/reference/api/typedoc/classes/ffilibrary.html
@@ -131,7 +131,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L49">runtime.ts:49</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L49">runtime.ts:49</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -156,7 +156,7 @@
 					<div class="tsd-signature tsd-kind-icon">exports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">Function</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L46">runtime.ts:46</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L46">runtime.ts:46</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -166,7 +166,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L45">runtime.ts:45</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L45">runtime.ts:45</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -176,7 +176,7 @@
 					<div class="tsd-signature tsd-kind-icon">wasm32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">boolean</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L44">runtime.ts:44</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L44">runtime.ts:44</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -186,7 +186,7 @@
 					<div class="tsd-signature tsd-kind-icon">webGPUContext<span class="tsd-signature-symbol">:</span> <a href="webgpucontext.html" class="tsd-signature-type">WebGPUContext</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L47">runtime.ts:47</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L47">runtime.ts:47</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -203,7 +203,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L76">runtime.ts:76</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L76">runtime.ts:76</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -226,7 +226,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L66">runtime.ts:66</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L66">runtime.ts:66</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -243,7 +243,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L84">runtime.ts:84</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L84">runtime.ts:84</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <a href="cachedcallstack.html" class="tsd-signature-type">CachedCallStack</a></h4>
@@ -260,7 +260,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L95">runtime.ts:95</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L95">runtime.ts:95</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -283,7 +283,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L72">runtime.ts:72</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L72">runtime.ts:72</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
diff --git a/docs/reference/api/typedoc/classes/graphexecutor.html b/docs/reference/api/typedoc/classes/graphexecutor.html
index 45846c8e6..d06298280 100644
--- a/docs/reference/api/typedoc/classes/graphexecutor.html
+++ b/docs/reference/api/typedoc/classes/graphexecutor.html
@@ -130,7 +130,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L583">runtime.ts:583</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L583">runtime.ts:583</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">module<span class="tsd-signature-symbol">:</span> <a href="module.html" class="tsd-signature-type">Module</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L579">runtime.ts:579</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L579">runtime.ts:579</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L654">runtime.ts:654</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L654">runtime.ts:654</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -224,7 +224,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L597">runtime.ts:597</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L597">runtime.ts:597</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -241,7 +241,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L631">runtime.ts:631</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L631">runtime.ts:631</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -279,7 +279,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L644">runtime.ts:644</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L644">runtime.ts:644</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -310,7 +310,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L621">runtime.ts:621</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L621">runtime.ts:621</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -332,7 +332,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L609">runtime.ts:609</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L609">runtime.ts:609</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
diff --git a/docs/reference/api/typedoc/classes/instance.html b/docs/reference/api/typedoc/classes/instance.html
index b62815cc9..eb105dfd1 100644
--- a/docs/reference/api/typedoc/classes/instance.html
+++ b/docs/reference/api/typedoc/classes/instance.html
@@ -139,7 +139,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L692">runtime.ts:692</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L692">runtime.ts:692</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -202,7 +202,7 @@
 					<div class="tsd-signature tsd-kind-icon">exports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">Function</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L684">runtime.ts:684</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L684">runtime.ts:684</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -212,7 +212,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L683">runtime.ts:683</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L683">runtime.ts:683</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -229,7 +229,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L932">runtime.ts:932</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L932">runtime.ts:932</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -260,7 +260,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L994">runtime.ts:994</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L994">runtime.ts:994</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -303,7 +303,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L924">runtime.ts:924</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L924">runtime.ts:924</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -341,7 +341,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L732">runtime.ts:732</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L732">runtime.ts:732</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -358,7 +358,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L952">runtime.ts:952</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L952">runtime.ts:952</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -402,7 +402,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L816">runtime.ts:816</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L816">runtime.ts:816</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -434,7 +434,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L1033">runtime.ts:1033</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L1033">runtime.ts:1033</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -465,7 +465,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L846">runtime.ts:846</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L846">runtime.ts:846</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -497,7 +497,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L750">runtime.ts:750</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L750">runtime.ts:750</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -520,7 +520,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L1013">runtime.ts:1013</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L1013">runtime.ts:1013</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -568,7 +568,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L789">runtime.ts:789</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L789">runtime.ts:789</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -608,7 +608,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L914">runtime.ts:914</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L914">runtime.ts:914</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -646,7 +646,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L1140">runtime.ts:1140</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L1140">runtime.ts:1140</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -698,7 +698,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L740">runtime.ts:740</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L740">runtime.ts:740</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -722,7 +722,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L868">runtime.ts:868</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L868">runtime.ts:868</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -754,7 +754,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L857">runtime.ts:857</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L857">runtime.ts:857</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -786,7 +786,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/runtime.ts#L940">runtime.ts:940</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/runtime.ts#L940">runtime.ts:940</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
diff --git a/docs/reference/api/typedoc/classes/memory.html b/docs/reference/api/typedoc/classes/memory.html
index dac49787f..a6ea64ea5 100644
--- a/docs/reference/api/typedoc/classes/memory.html
+++ b/docs/reference/api/typedoc/classes/memory.html
@@ -130,7 +130,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L40">memory.ts:40</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L40">memory.ts:40</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -152,7 +152,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Memory</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L32">memory.ts:32</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L32">memory.ts:32</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">wasm32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">boolean</span><span class="tsd-signature-symbol"> = true</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L33">memory.ts:33</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L33">memory.ts:33</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L154">memory.ts:154</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L154">memory.ts:154</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -210,7 +210,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L90">memory.ts:90</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L90">memory.ts:90</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -233,7 +233,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L97">memory.ts:97</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L97">memory.ts:97</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -256,7 +256,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L74">memory.ts:74</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L74">memory.ts:74</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -279,7 +279,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c79b8f1aa/web/src/memory.ts#L81">memory.ts:81</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/6bad21e9f/web/src/memory.ts#L81">memory.ts:81</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
... 2097 lines suppressed ...