You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2023/01/07 05:51:34 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@088bc118c7a0abd263b634dc88be59813652251c)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 38c756ae22 deploying docs (apache/tvm@088bc118c7a0abd263b634dc88be59813652251c)
38c756ae22 is described below

commit 38c756ae2281e2d604cd9c622cac78ac97bad1d6
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Sat Jan 7 05:51:26 2023 +0000

    deploying docs (apache/tvm@088bc118c7a0abd263b634dc88be59813652251c)
---
 docs/_images/sphx_glr_micro_train_001.png          |  Bin 335230 -> 298784 bytes
 docs/_images/sphx_glr_micro_train_thumb.png        |  Bin 23974 -> 22856 bytes
 .../how_to/compile_models/from_darknet.rst.txt     |    2 +-
 .../how_to/compile_models/from_keras.rst.txt       |    2 +-
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_adreno.rst.txt   |    7 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   22 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |   10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 | 1154 +++-
 .../tune_network_cuda.rst.txt                      |    4 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |   20 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |    8 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |  215 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../work_with_microtvm/micro_pytorch.rst.txt       |    4 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   18 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |   14 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    8 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |    2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   16 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    6 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |   13 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |   20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   59 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   26 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   42 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_darknet.html       |    2 +-
 docs/how_to/compile_models/from_keras.html         |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       |   12 +-
 docs/how_to/compile_models/from_pytorch.html       |   10 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_adreno.html      |    3 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   38 +-
 docs/how_to/deploy_models/deploy_prequantized.html |    7 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   38 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   30 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |   10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    | 1154 +++-
 .../tune_with_autoscheduler/tune_network_cuda.html |    4 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |   20 +-
 .../tune_with_autotvm/sg_execution_times.html      |    8 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |  215 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_pytorch.html  |    4 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |   14 +-
 .../how_to/work_with_relay/sg_execution_times.html |    8 +-
 docs/how_to/work_with_schedules/intrin_math.html   |    2 +-
 .../work_with_schedules/sg_execution_times.html    |   16 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 docs/install/nnpack.html                           |   12 +-
 docs/reference/api/doxygen/classes.html            |   40 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectRef.html  |    2 +-
 ...asstvm_1_1runtime_1_1ObjectRef__coll__graph.svg |   12 +-
 ...asstvm_1_1tir_1_1DataTypeLegalizer-members.html |   75 +-
 .../classtvm_1_1tir_1_1DataTypeLegalizer.html      |  128 +-
 ...vm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg |  117 +-
 ...1_1tir_1_1DataTypeLegalizer__inherit__graph.svg |   36 +-
 ...sstvm_1_1tir_1_1ExprMutator__inherit__graph.svg |   36 +-
 ..._1_1tir_1_1IndexDataTypeNormalizer-members.html |    4 +-
 ...classtvm_1_1tir_1_1IndexDataTypeNormalizer.html |   10 +-
 ...tir_1_1IndexDataTypeNormalizer__coll__graph.svg |  359 +-
 ..._1_1IndexDataTypeNormalizer__inherit__graph.svg |   36 +-
 ...vm_1_1tir_1_1IndexDataTypeRewriter-members.html |   30 +-
 .../classtvm_1_1tir_1_1IndexDataTypeRewriter.html  |   32 +-
 ..._1tir_1_1IndexDataTypeRewriter__coll__graph.svg |  267 +-
 ...ir_1_1IndexDataTypeRewriter__inherit__graph.svg |   36 +-
 ...m_1_1tir_1_1StmtExprMutator__inherit__graph.svg |   36 +-
 ...sstvm_1_1tir_1_1StmtMutator__inherit__graph.svg |   36 +-
 .../doxygen/data__type__rewriter_8h_source.html    |   15 +-
 docs/reference/api/doxygen/functions_func_v.html   |   30 +-
 docs/reference/api/doxygen/functions_v.html        |   42 +-
 docs/reference/api/doxygen/functions_vars_v.html   |    2 +-
 docs/reference/api/doxygen/hierarchy.html          |  815 ++-
 docs/reference/api/doxygen/inherit_graph_101.svg   |    8 +-
 docs/reference/api/doxygen/inherit_graph_118.svg   |   32 +-
 docs/reference/api/doxygen/inherit_graph_12.svg    |   16 +-
 docs/reference/api/doxygen/inherit_graph_127.svg   | 6361 ++++++++++----------
 docs/reference/api/doxygen/inherit_graph_212.svg   |   16 +-
 docs/reference/api/doxygen/inherit_graph_213.svg   |   16 +-
 docs/reference/api/doxygen/inherit_graph_42.svg    |   16 +-
 docs/reference/api/doxygen/inherit_graph_46.svg    |    8 +-
 docs/reference/api/doxygen/inherits.html           |    2 +-
 docs/reference/api/doxygen/search/all_13.js        |    2 +-
 docs/reference/api/doxygen/search/all_14.js        |    6 +-
 docs/reference/api/doxygen/search/all_15.js        |    6 +-
 docs/reference/api/doxygen/search/all_17.js        |   10 +-
 docs/reference/api/doxygen/search/all_9.js         |    2 +-
 docs/reference/api/doxygen/search/all_d.js         |    2 +-
 docs/reference/api/doxygen/search/all_e.js         |    1 -
 docs/reference/api/doxygen/search/classes_10.js    |    6 +-
 docs/reference/api/doxygen/search/classes_11.js    |    6 +-
 docs/reference/api/doxygen/search/classes_13.js    |    2 +-
 docs/reference/api/doxygen/search/classes_7.js     |    2 +-
 docs/reference/api/doxygen/search/classes_9.js     |    2 +-
 docs/reference/api/doxygen/search/classes_a.js     |    1 -
 docs/reference/api/doxygen/search/classes_f.js     |    2 +-
 docs/reference/api/doxygen/search/functions_16.js  |    4 +-
 docs/reference/api/doxygen/search/variables_14.js  |    2 +-
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    6 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |   10 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    8 +-
 docs/tutorial/autotvm_matmul_x86.html              |   20 +-
 docs/tutorial/autotvm_relay_x86.html               |  269 +-
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   28 +-
 docs/tutorial/tensor_expr_get_started.html         |   42 +-
 178 files changed, 7329 insertions(+), 5838 deletions(-)

diff --git a/docs/_images/sphx_glr_micro_train_001.png b/docs/_images/sphx_glr_micro_train_001.png
index 4730ebaecb..fb3c2850a3 100644
Binary files a/docs/_images/sphx_glr_micro_train_001.png and b/docs/_images/sphx_glr_micro_train_001.png differ
diff --git a/docs/_images/sphx_glr_micro_train_thumb.png b/docs/_images/sphx_glr_micro_train_thumb.png
index 4f63c99e35..86defffe09 100644
Binary files a/docs/_images/sphx_glr_micro_train_thumb.png and b/docs/_images/sphx_glr_micro_train_thumb.png differ
diff --git a/docs/_sources/how_to/compile_models/from_darknet.rst.txt b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
index 0bc8c99c7f..bf6c262653 100644
--- a/docs/_sources/how_to/compile_models/from_darknet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
@@ -319,7 +319,7 @@ The process is no different from other examples.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  6.880 seconds)
+   **Total running time of the script:** ( 1 minutes  10.786 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_darknet.py:
diff --git a/docs/_sources/how_to/compile_models/from_keras.rst.txt b/docs/_sources/how_to/compile_models/from_keras.rst.txt
index 88a5498f1d..6ef54fb3ea 100644
--- a/docs/_sources/how_to/compile_models/from_keras.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_keras.rst.txt
@@ -232,7 +232,7 @@ Look up prediction top 1 index in 1000 class synset.
  .. code-block:: none
 
     Relay top-1 id: 285, class name: Egyptian cat
-
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 933ms/step
+
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 954ms/step
     Keras top-1 id: 285, class name: Egyptian cat
 
 
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index 267a11b602..518ae85397 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -116,7 +116,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip5b2e833d-6156-4871-b906-654707010ba8 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip1e507af6-40d4-4cf9-96c2-f151e3077678 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index df8666ff17..d5d94d6185 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -121,7 +121,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     23%|##2       | 9.38M/41.5M [00:00<00:00, 98.4MB/s]
     45%|####5     | 18.8M/41.5M [00:00<00:00, 87.4MB/s]
     66%|######5   | 27.2M/41.5M [00:00<00:00, 75.4MB/s]
     83%|########3 | 34.5M/41.5M [00:00<00:00, 75.5MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 68.7MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     15%|#5        | 6.33M/41.5M [00:00<00:00, 55.2MB/s]
     28%|##7       | 11.6M/41.5M [00:00<00:00, 55.1MB/s]
     41%|####      | 16.9M/41.5M [00:00<00:00, 52.5MB/s]
     58%|#####7    | 24.0M/41.5M [00:00<00:00, 51.2MB/s]
     76%|#######6  | 31.7M/41.5M [00:00<00:00, 58.7MB/s]
     91%|######### | 37.6M/41.5M [00:00<00:00, 59.7MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 55.1MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index eee4883552..a855cf906d 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -102,7 +102,7 @@ Load a pretrained PyTorch model
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     28%|##7       | 12.3M/44.7M [00:00<00:00, 129MB/s]
     55%|#####5    | 24.6M/44.7M [00:00<00:00, 109MB/s]
     79%|#######8  | 35.2M/44.7M [00:00<00:00, 106MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 106MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     18%|#7        | 7.99M/44.7M [00:00<00:00, 76.3MB/s]
     40%|####      | 18.1M/44.7M [00:00<00:00, 89.9MB/s]
     60%|#####9    | 26.6M/44.7M [00:00<00:00, 88.5MB/s]
     80%|#######9  | 35.6M/44.7M [00:00<00:00, 90.4MB/s]
     99%|#########8| 44.2M/44.7M [00:00<00:00, 89.7MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 89.4MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 55c9576734..d4c22bc01a 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -425,7 +425,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  10.111 seconds)
+   **Total running time of the script:** ( 1 minutes  11.754 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 3cd4e3c06b..5e3f72cc3c 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:33.870** total execution time for **how_to_compile_models** files:
+**05:50.437** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:10.111 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:11.754 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:06.880 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:10.786 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:45.810 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:49.910 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:31.605 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:34.675 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:28.422 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:30.955 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:25.666 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:26.373 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.498 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.948 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:22.170 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:22.015 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:16.280 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:16.634 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.426 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.385 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
index 5f020b1cea..0e11b8e981 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
@@ -728,18 +728,13 @@ well as provides information about the model's performance
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-     3340.8585    3339.3044    3354.8016    3335.9312      5.3039   
+     2846.1168    2831.2393    2933.0375    2802.9358     45.3458   
                
 
 
 
 
 
-.. rst-class:: sphx-glr-timing
-
-   **Total running time of the script:** ( 1 minutes  0.638 seconds)
-
-
 .. _sphx_glr_download_how_to_deploy_models_deploy_model_on_adreno.py:
 
 .. only:: html
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index e525bbf6c7..c6d778845b 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -437,7 +437,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      15.8475      15.6642      16.5656      15.4942       0.3540   
+      16.8016      16.6977      17.8308      16.6099       0.3473   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index bdc72328f3..b980b36ef1 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -131,7 +131,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=MaskRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
      7%|7         | 12.1M/170M [00:00<00:01, 127MB/s]
     14%|#4        | 24.2M/170M [00:00<00:01, 109MB/s]
     20%|##        | 34.8M/170M [00:00<00:01, 106MB/s]
     26%|##6       | 44.9M/170M [00:00<00:01, 104MB/s]
     32%|###2      | 54.9M/170M [00:00<00:01, 102MB/s]
     38%|###8      | 64.6M/170M [00:00<00:01, 96.6MB/s]
     45%|####4     | 75.7M/170M [00:00<00:00, 102MB/s] 
     50%|#####     | 85.6M/170M [00:00<00:00, 102MB/s]
     56%|#####6    | 95.3M/170M [00:00<00:00, 101MB/s]
     62%|######1   | 105M/170M [00:01<00:00, 95.3MB/s]
     69%|######8   | 116M/170M [00:01<00:00, 102MB/s] 
     74%|#######4  | 126M/170M [00:01<00:00, 102MB/s]
     80%|########  | 136M/170M [00:01<00:00, 95.7MB/s]
     87%|########6 | 148M/170M [00:01<00:00, 103MB/s] 
     93%|#########2| 157M/170M [00:01<00:00, 101MB/s]
     98%|#########8| 167M/170M [00:01<00:00, 101MB/s]
    100%|##########| 170M/170M [00:01<00:00, 101MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      5%|4         | 7.99M/170M [00:00<00:03, 49.2MB/s]
      9%|9         | 16.1M/170M [00:00<00:02, 65.1MB/s]
     14%|#4        | 24.1M/170M [00:00<00:02, 72.5MB/s]
     19%|#8        | 32.1M/170M [00:00<00:01, 73.7MB/s]
     24%|##3       | 40.1M/170M [00:00<00:01, 76.0MB/s]
     28%|##8       | 48.0M/170M [00:00<00:01, 78.2MB/s]
     38%|###7      | 64.0M/170M [00:00<00:01, 87.8MB/s]
     44%|####3     | 74.1M/170M [00:00<00:01, 92.6MB/s]
     49%|####9     | 83.7M/170M [00:01<00:00, 94.9MB/s]
     55%|#####4    | 92.8M/170M [00:01<00:00, 93.1MB/s]
     60%|#####9    | 102M/170M [00:01<00:00, 80.2MB/s] 
     65%|######4   | 110M/170M [00:01<00:00, 66.2MB/s]
     71%|#######   | 120M/170M [00:01<00:00, 68.0MB/s]
     78%|#######7  | 132M/170M [00:01<00:00, 81.7MB/s]
     83%|########2 | 141M/170M [00:01<00:00, 79.7MB/s]
     88%|########7 | 149M/170M [00:02<00:00, 73.9MB/s]
     92%|#########1| 156M/170M [00:02<00:00, 73.3MB/s]
   
   98%|#########7| 166M/170M [00:02<00:00, 82.0MB/s]
    100%|##########| 170M/170M [00:02<00:00, 76.7MB/s]
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/nn/functional.py:3897: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:124: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -300,7 +300,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  7.575 seconds)
+   **Total running time of the script:** ( 3 minutes  15.325 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index add8bc8432..f84183c28f 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -240,7 +240,7 @@ training. Other models require a full post training calibration.
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=MobileNet_V2_Weights.IMAGENET1K_V1`. You can also use `weights=MobileNet_V2_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 149MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     59%|#####8    | 7.99M/13.6M [00:00<00:00, 81.8MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 102MB/s] 
 
 
 
@@ -422,7 +422,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.0417      89.9371      93.6422      89.7781       0.4685   
+      94.9061      94.5728      99.8493      89.9776       3.2943   
                
 
 
@@ -471,7 +471,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  4.745 seconds)
+   **Total running time of the script:** ( 1 minutes  6.252 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 43f4de2a89..1ec706758c 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -436,7 +436,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      118.5336     118.5244     124.8535     116.4806      1.0182   
+      120.3976     120.3663     123.8272     119.6521      0.4755   
                
 
 
@@ -473,7 +473,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  34.865 seconds)
+   **Total running time of the script:** ( 2 minutes  24.856 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 8ef23d1187..9389f876f8 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -257,7 +257,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  28.151 seconds)
+   **Total running time of the script:** ( 1 minutes  27.633 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 853b1a6679..94c5d4c7f0 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -170,7 +170,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|3         | 4845/132723 [00:00<00:02, 48441.85KB/s]
      9%|9         | 12273/132723 [00:00<00:01, 63638.23KB/s]
     15%|#5        | 19975/132723 [00:00<00:01, 69746.15KB/s]
     21%|##        | 27663/132723 [00:00<00:01, 72561.03KB/s]
     27%|##6       | 35358/132723 [00:00<00:01, 74141.90KB/s]
     32%|###2      | 43043/132723 [00:00<00:01, 75061.67KB/s]
     38%|###8      | 50775/132723 [00:00<00:01, 75798.55KB/s]
     44%|####4     | 58517/132723 [00:00<00:00, 76310.80KB/s]
     50%|####9     | 66263/132723 [00:00<00:00, 76667.55KB/s]
     56%|#####5    | 74045/132723 [00:01<00:00, 77021.35KB/s]
     62%|######1   | 81763/132723 [00:01<00:00, 77067.06KB/s]
     67%|######7   | 89511/132723 [00:01<00:00, 77188.55KB/s]
     73%|#######3  | 97258/132723 [00:01<00:00, 77271.70KB/s]
     79%|#######9  | 105030/132723 [00:01<00:00, 77402.61KB/s]
     85%|########4 | 112797/132723 [00:01<00:00, 77478.21KB/s]
     91%|#########
  | 120577/132723 [00:01<00:00, 77572.35KB/s]
     97%|#########6| 128410/132723 [00:01<00:00, 77795.38KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 75605.07KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      2%|2         | 2717/132723 [00:00<00:04, 27153.97KB/s]
      8%|7         | 10158/132723 [00:00<00:02, 54943.45KB/s]
     14%|#3        | 18244/132723 [00:00<00:01, 66772.88KB/s]
     20%|#9        | 26332/132723 [00:00<00:01, 72337.72KB/s]
     26%|##5       | 34401/132723 [00:00<00:01, 75347.70KB/s]
     32%|###2      | 42565/132723 [00:00<00:01, 77485.04KB/s]
     38%|###8      | 50747/132723 [00:00<00:01, 78896.02KB/s]
     44%|####4     | 58915/132723 [00:00<00:00, 79781.15KB/s]
     51%|#####     | 67058/132723 [00:00<00:00, 80295.51KB/s]
     57%|#####6    | 75218/132723 [00:01<00:00, 80697.10KB/s]
     63%|######2   | 83423/132723 [00:01<00:00, 81109.68KB/s]
     69%|######9   | 91639/132723 [00:01<00:00, 81426.02KB/s]
     75%|#######5  | 99832/132723 [00:01<00:00, 81577.76KB/s]
     81%|########1 | 108029/132723 [00:01<00:00, 81693.08KB/s]
     88%|########7 | 116216/132723 [00:01<00:00, 81737.38KB/s]
     94%|#########
 3| 124390/132723 [00:01<00:00, 81411.82KB/s]
    100%|#########9| 132532/132723 [00:01<00:00, 80315.84KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 77563.74KB/s]
 
 
 
@@ -246,7 +246,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  3.701 seconds)
+   **Total running time of the script:** ( 3 minutes  20.215 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 4ae6734fa3..bed37c8042 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**13:44.485** total execution time for **how_to_deploy_models** files:
+**14:01.296** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:07.575 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 03:20.215 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 03:03.701 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:15.325 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:34.865 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:24.856 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:28.151 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:27.633 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:04.745 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:06.252 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_adreno.py` (``deploy_model_on_adreno.py``)                   | 01:00.638 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_adreno.py` (``deploy_model_on_adreno.py``)                   | 00:56.856 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:34.717 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:38.381 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:25.260 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:26.751 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:24.827 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:25.021 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.007 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 8a86004a43..606c4c7436 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -476,7 +476,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipa0a8e594-1da9-4714-9833-f7e4fd889ea3 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip9dcdd43b-c5e0-42fb-9396-dfe32c896d74 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index f5dfeac1ef..734c1db2d2 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:46.260** total execution time for **how_to_extend_tvm** files:
+**00:47.640** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:42.925 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:43.843 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.327 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.677 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.000 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.111 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.007 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.008 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 9e5808049c..0bac6be275 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -220,10 +220,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 7092us [7092us] (46.21%; 46.21%)
-    FoldScaleAxis: 8256us [7us] (53.79%; 53.79%)
-            FoldConstant: 8250us [1701us] (53.75%; 99.92%)
-                    InferType: 6549us [6549us] (42.67%; 79.39%)
+    InferType: 7870us [7870us] (46.52%; 46.52%)
+    FoldScaleAxis: 9047us [7us] (53.48%; 53.48%)
+            FoldConstant: 9040us [1849us] (53.44%; 99.92%)
+                    InferType: 7191us [7191us] (42.51%; 79.54%)
 
 
 
@@ -262,10 +262,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6621us [6621us] (45.31%; 45.31%)
-    FoldScaleAxis: 7992us [5us] (54.69%; 54.69%)
-            FoldConstant: 7987us [1682us] (54.66%; 99.94%)
-                    InferType: 6305us [6305us] (43.15%; 78.94%)
+    InferType: 7257us [7257us] (44.60%; 44.60%)
+    FoldScaleAxis: 9013us [5us] (55.40%; 55.40%)
+            FoldConstant: 9008us [1905us] (55.37%; 99.95%)
+                    InferType: 7103us [7103us] (43.66%; 78.85%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 9fa8a5202c..b44a240701 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -344,7 +344,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 35.913761 ms
+    Convolution: 54.166305 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 12f86b05f1..5c6d49843a 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -661,7 +661,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 13.342461 ms
+    conv2d with tensor core: 13.359718 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index efbfe428b6..ce1ed55a56 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -147,8 +147,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.018220
-    Baseline: 3.325094
+    Numpy running time: 0.020501
+    Baseline: 3.317244
 
 
 
@@ -242,7 +242,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.296584
+    Opt1: 0.328635
 
 
 
@@ -344,7 +344,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.329013
+    Opt2: 0.346811
 
 
 
@@ -439,7 +439,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.113831
+    Opt3: 0.126123
 
 
 
@@ -563,7 +563,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.109471
+    Opt4: 0.119191
 
 
 
@@ -684,7 +684,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.110779
+    Opt5: 0.121588
 
 
 
@@ -808,7 +808,7 @@ Furthermore, we can also utilize multi-core processors to do the thread-level pa
 
  .. code-block:: none
 
-    Opt6: 0.146559
+    Opt6: 0.156732
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 49513e5246..df276e8f8b 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:34.501** total execution time for **how_to_optimize_operators** files:
+**00:36.448** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:31.855 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:33.768 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.541 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.579 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.105 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.102 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 4c7fce352c..085d0c3623 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**09:02.544** total execution time for **how_to_tune_with_autoscheduler** files:
+**09:17.784** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 05:41.286 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 05:46.184 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:30.702 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:33.595 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 01:00.923 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 01:03.655 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:26.801 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:29.103 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:11.840 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:13.153 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:10.991 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:12.093 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 7fd20a997e..d442c3fd40 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -243,163 +243,484 @@ cooperative fetching, unrolling and operator fusion.
                  bias: Buffer(bias_2: Pointer(float32), float32, [1, 512, 1, 1], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 64;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [4032]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [1], [], scope="local", align=4)[0] = 0f32
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 28;
+      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[2] = 0f32
         conv2d_nchw_1[3] = 0f32
-        for (rc.outer.outer: int32, 0, 8) {
+        conv2d_nchw_1[4] = 0f32
+        conv2d_nchw_1[5] = 0f32
+        conv2d_nchw_1[6] = 0f32
+        conv2d_nchw_1[7] = 0f32
+        conv2d_nchw_1[8] = 0f32
+        conv2d_nchw_1[9] = 0f32
+        conv2d_nchw_1[10] = 0f32
+        conv2d_nchw_1[11] = 0f32
+        conv2d_nchw_1[12] = 0f32
+        conv2d_nchw_1[13] = 0f32
+        for (rc.outer.outer: int32, 0, 64) {
           for (ry.outer.outer: int32, 0, 3) {
-            let cse_var_4: int32 = (rc.outer.outer*3136)
-            let cse_var_3: int32 = (ry.outer.outer*7)
-            let cse_var_2: int32 = (rc.outer.outer*576)
+            let cse_var_2: int32 = (rc.outer.outer*72)
             let cse_var_1: int32 = (ry.outer.outer*3)
              {
-              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [4032], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else(((((1 <= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) && ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data_3: Buffer(data_2, float32, [25088], [])[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) - 8)], 0f32 [...]
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 98), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 196), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 294), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 392), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 490), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 588), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 686), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 1), 9))) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 784), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else(((((1 <= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) && ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 678)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 980), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1078), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1176), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1274), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1372), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1470), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1568), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 1), 9))) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1666), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else(((((1 <= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) && ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 1364)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1862), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1960), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2058)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2058), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2156)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2156), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2254)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2254), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2352)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2352), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2450)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2450), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2548)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 1), 9))) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2548), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2646)] = @tir.if_then_else(((((1 <= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) && ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 2050)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2744)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2744), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2842)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2842), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 2940)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2940), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3038)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3038), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3136)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3136), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3234)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3234), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3332)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3332), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3430)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 1), 9))) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3430), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3528)] = @tir.if_then_else(((((1 <= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) && ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 2736)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3626)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3626), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3724)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3724), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3822)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3822), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              pad_temp.shared_1[(threadIdx.x_1 + 3920)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3920), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              if @tir.likely((threadIdx.x_1 < 14), dtype=bool) {
-                pad_temp.shared_1[(threadIdx.x_1 + 4018)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 4018), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-              }
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope="shared")[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[(((((blockIdx.x*36864) + cse_var_2) + (floordiv(threadIdx.x_2, 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 98)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 98), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 98), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 196)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 196), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 4), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 294)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 294), 192)*4608)) + cse_var_2) + (floormod((floordiv(threadIdx.x_2, 3) + 34), 64)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 392)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 392), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 490)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 490), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 106), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 588)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 588), 192)*4608)) + cse_var_2) + ((floordiv(threadIdx.x_2, 3) + 4)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 686)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 686), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 110), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 784)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 784), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 882)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 882), 192)*4608)) + cse_var_2) + (floormod((floordiv(threadIdx.x_2, 3) + 38), 64)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 980)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 980), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 20), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 1078)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1078), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 118), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1176), 192)*4608)) + cse_var_2) + ((floordiv(threadIdx.x_2, 3) + 8)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 1274)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1274), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 122), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              kernel.shared_1[(threadIdx.x_2 + 1372)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1372), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 28), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 98;
-              if @tir.likely((threadIdx.x_2 < 66), dtype=bool) {
-                kernel.shared_1[(threadIdx.x_2 + 1470)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1470), 192)*4608)) + cse_var_2) + ((floordiv(threadIdx.x_2, 3) + 42)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              }
-              for (rc.inner: int32, 0, 64) {
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((floordiv(threadIdx.x, 49)*192) + (rc.inner*3))]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 384)]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 768)]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1152)]))
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1)]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 385)]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 769)]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1153)]))
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 2)]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 386)]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 770)]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1154)]))
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope="shared")[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), data_3: Buffer(data_2, float32, [25088], [])[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + fl [...]
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
+                }
               }
+              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope="shared")[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 64)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 64), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 128)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 128), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 192)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 256)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 256), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 320)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 320), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 384)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 512)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 512), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 576)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 640)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 640), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 704)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 704), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 768)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 832)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 832), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 960)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1024), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1088), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1216), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1280), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1408), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1472), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1600), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1664), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1792), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1856), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1984), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2048), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2176), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2240), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2368), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2432), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2560), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2624), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2752), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2816), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2944), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 3008), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
             }
           }
         }
-        compute_3: Buffer(compute_2, float32, [25088], [])[((blockIdx.x*392) + threadIdx.x)] = max((conv2d_nchw_1[0] + bias_3: Buffer(bias_2, float32, [512], [])[((blockIdx.x*8) + floordiv(threadIdx.x, 49))]), 0f32)
-        compute_3[(((blockIdx.x*392) + threadIdx.x) + 98)] = max((conv2d_nchw_1[1] + bias_3[(((blockIdx.x*8) + floordiv(threadIdx.x, 49)) + 2)]), 0f32)
-        compute_3[(((blockIdx.x*392) + threadIdx.x) + 196)] = max((conv2d_nchw_1[2] + bias_3[(((blockIdx.x*8) + floordiv(threadIdx.x, 49)) + 4)]), 0f32)
-        compute_3[(((blockIdx.x*392) + threadIdx.x) + 294)] = max((conv2d_nchw_1[3] + bias_3[(((blockIdx.x*8) + floordiv(threadIdx.x, 49)) + 6)]), 0f32)
+        for (i1.inner: int32, 0, 2) {
+          for (i3.inner: int32, 0, 7) {
+            compute_3: Buffer(compute_2, float32, [25088], [])[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias_3: Buffer(bias_2, float32, [512], [])[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
+          }
+        }
       }
     }
 
@@ -453,7 +774,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.280 ms
+    Execution time of this operator: 0.351 ms
 
 
 
@@ -502,35 +823,35 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=2)
-    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=4)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=64)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=1)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
     conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
     conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
-    conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=3)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
+    conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=2)
-    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=4)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -550,12 +871,12 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=98)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=98)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
@@ -575,100 +896,431 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(98) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[4];
-      __shared__ float pad_temp_shared[4032];
-      __shared__ float kernel_shared[1536];
+    extern "C" __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[14];
+      __shared__ float pad_temp_shared[72];
+      __shared__ float kernel_shared[3072];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
       conv2d_nchw[3] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 8; ++rc_outer_outer) {
+      conv2d_nchw[4] = 0.000000e+00f;
+      conv2d_nchw[5] = 0.000000e+00f;
+      conv2d_nchw[6] = 0.000000e+00f;
+      conv2d_nchw[7] = 0.000000e+00f;
+      conv2d_nchw[8] = 0.000000e+00f;
+      conv2d_nchw[9] = 0.000000e+00f;
+      conv2d_nchw[10] = 0.000000e+00f;
+      conv2d_nchw[11] = 0.000000e+00f;
+      conv2d_nchw[12] = 0.000000e+00f;
+      conv2d_nchw[13] = 0.000000e+00f;
+      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
         for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
           __syncthreads();
-          pad_temp_shared[((int)threadIdx.x)] = (((((1 <= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 98) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 196)] = (((((1 <= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 196) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 294) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 392)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 392) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 <= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 490) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 588) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 <= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 686) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 <= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 1) % 9))) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 784) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 882)] = (((((1 <= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 678)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 980) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((((1 <= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1078) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1176) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1274)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1274) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 <= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1372) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1470) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 <= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1568) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 <= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 1) % 9))) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1666) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1764)] = (((((1 <= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 1364)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1862) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((((1 <= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1960) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2058)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2058) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2156)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2156) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2254)] = (((((1 <= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2254) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2352)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2352) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2450)] = (((((1 <= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2450) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2548)] = (((((1 <= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 1) % 9))) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2548) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2646)] = (((((1 <= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 2050)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2744)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2744) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2842)] = (((((1 <= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2842) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 2940)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2940) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3038)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3038) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3136)] = (((((1 <= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3136) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3234)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3234) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3332)] = (((((1 <= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3332) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3430)] = (((((1 <= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 1) % 9))) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3430) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3528)] = (((((1 <= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 2736)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3626)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3626) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3724)] = (((((1 <= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3724) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3822)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3822) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 3920)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3920) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-          if (((int)threadIdx.x) < 14) {
-            pad_temp_shared[(((int)threadIdx.x) + 4018)] = (((((((((int)threadIdx.x) + 49) / 9) + ry_outer_outer) < 8) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 4018) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 9))) && (((((int)threadIdx.x) * 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
           }
-          kernel_shared[((int)threadIdx.x)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 98)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 98) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 98) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 196)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 196) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 4) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 294)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 294) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) / 3) + 34) & 63) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 392) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 8) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 490)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 490) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 106) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 588)] = kernel[(((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 588) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36)];
-          kernel_shared[(((int)threadIdx.x) + 686)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 686) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 110) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 784)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 784) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 882)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 882) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) / 3) + 38) & 63) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 980)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 980) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 20) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1078)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1078) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 118) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[(((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1176) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 72)];
-          kernel_shared[(((int)threadIdx.x) + 1274)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1274) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 122) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1372)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1372) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 28) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          if (((int)threadIdx.x) < 66) {
-            kernel_shared[(((int)threadIdx.x) + 1470)] = kernel[(((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1470) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 378)];
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 9))) && ((((((int)threadIdx.x) * 4) + 1) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
           }
-          __syncthreads();
-          for (int rc_inner = 0; rc_inner < 64; ++rc_inner) {
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3))]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 384)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 768)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1152)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 385)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 769)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1153)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 2)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 386)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 770)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1154)]));
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 9))) && ((((((int)threadIdx.x) * 4) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
           }
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 9))) && ((((((int)threadIdx.x) * 4) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
+          }
+          kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
+          kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
+          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
+          kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
+          kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
+          kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
+          kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+          kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
+          kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
+          kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
+          kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
+          kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
+          kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
+          kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
+          kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
+          kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          __syncthreads();
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+        }
+      }
+      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
+        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
+          compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
         }
       }
-      compute[((((int)blockIdx.x) * 392) + ((int)threadIdx.x))] = max((conv2d_nchw[0] + bias[((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49))]), 0.000000e+00f);
-      compute[(((((int)blockIdx.x) * 392) + ((int)threadIdx.x)) + 98)] = max((conv2d_nchw[1] + bias[(((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49)) + 2)]), 0.000000e+00f);
-      compute[(((((int)blockIdx.x) * 392) + ((int)threadIdx.x)) + 196)] = max((conv2d_nchw[2] + bias[(((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49)) + 4)]), 0.000000e+00f);
-      compute[(((((int)blockIdx.x) * 392) + ((int)threadIdx.x)) + 294)] = max((conv2d_nchw[3] + bias[(((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49)) + 6)]), 0.000000e+00f);
     }
 
 
@@ -729,7 +1381,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 5 minutes  41.286 seconds)
+   **Total running time of the script:** ( 5 minutes  46.184 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index fe7b343ed3..d9b6618a91 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -647,7 +647,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       7.8845       7.8834       7.8916       7.8784       0.0055   
+       7.8521       7.8516       7.8537       7.8510       0.0011   
                
 
 
@@ -675,7 +675,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  0.923 seconds)
+   **Total running time of the script:** ( 1 minutes  3.655 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index d1aac17678..3e54fe878b 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -666,7 +666,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      744.5041     744.4052     745.8627     743.2444      1.0712   
+      757.8213     756.5935     760.5049     756.3655      1.8999   
                
 
 
@@ -694,7 +694,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  30.702 seconds)
+   **Total running time of the script:** ( 1 minutes  33.595 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index dea7ae50a4..ff40c3e72f 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -390,25 +390,27 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [128, 512], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [128, 512], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute} {
-      for (i0.outer.i1.outer.fused: int32, 0, 2048) "parallel" {
-        allocate(compute_3: Pointer(global float32), float32, [32]), storage_scope = global {
-          for (i.outer.inner: int32, 0, 2) {
+      for (i0.outer.i1.outer.fused: int32, 0, 512) "parallel" {
+        allocate(compute_3: Pointer(global float32), float32, [128]), storage_scope = global {
+          for (i.outer.inner: int32, 0, 8) {
             for (j.init: int32, 0, 16) {
-              compute_4: Buffer(compute_3, float32, [32], [])[((i.outer.inner*16) + j.init)] = 0f32
+              compute_4: Buffer(compute_3, float32, [128], [])[((i.outer.inner*16) + j.init)] = 0f32
             }
             for (elem_idx: int32, 0, let cse_var_1: int32 = floormod(i0.outer.i1.outer.fused, 32) in (placeholder_15: Buffer(placeholder_13, int32, [33], [])[(cse_var_1 + 1)] - placeholder_15[cse_var_1])) {
               for (j: int32, 0, 16) {
                 let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32)
                 if @tir.likely((elem_idx < (placeholder_15[(cse_var_2 + 1)] - placeholder_15[cse_var_2])), dtype=bool) {
                   let cse_var_3: int32 = ((i.outer.inner*16) + j)
-                  compute_4[cse_var_3] = (compute_4[cse_var_3] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[(((placeholder_15[cse_var_2]*16) + (elem_idx*16)) + j)]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[(((floordiv(i0.outer.i1.outer.fused, 32)*512) + (i.outer.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[cse_var_2] + elem_idx)])], 0f32)))
+                  compute_4[cse_var_3] = (compute_4[cse_var_3] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[(((placeholder_15[cse_var_2]*16) + (elem_idx*16)) + j)]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[(((floordiv(i0.outer.i1.outer.fused, 32)*2048) + (i.outer.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[cse_var_2] + elem_idx)])], 0f32)))
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 2) {
-            let cse_var_4: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*1024) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
-            compute_5: Buffer(compute_2, float32, [65536], [])[ramp(cse_var_4, 1, 16)] = max((compute_4[ramp((i0.inner*16), 1, 16)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[ramp(cse_var_4, 1, 16)]), broadcast(0f32, 16))
+          for (i0.inner: int32, 0, 8) {
+            for (i1.inner: int32, 0, 16) {
+              let cse_var_4: int32 = ((((floordiv(i0.outer.i1.outer.fused, 32)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16)) + i1.inner)
+              compute_5: Buffer(compute_2, float32, [65536], [])[cse_var_4] = max((compute_4[((i0.inner*16) + i1.inner)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[cse_var_4]), 0f32)
+            }
           }
         }
       }
@@ -464,7 +466,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.901 ms
+    Execution time of this operator: 1.916 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index bc2bff70aa..636918b2fd 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:38.048** total execution time for **how_to_tune_with_autotvm** files:
+**00:32.700** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:38.012 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:32.662 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.021 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.022 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.005 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.006 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index d785a067c3..3426b6cb84 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -391,9 +391,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 16, 4, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6150439
-    No: 2   GFLOPS: 18.74/18.74     result: MeasureResult(costs=(0.012354673555555556,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.9697260856628418, timestamp=1673058803.3903506)       [('tile_f', [-1, 4, 1, 32]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5303507
-    No: 3   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 4, 128]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,727315
+    No: 2   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -515,8 +514,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10012157
-    No: 4   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 16]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9226297
+    No: 3   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -638,8 +637,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 128]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 8]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6100591
-    No: 5   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 128]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4549373
+    No: 4   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -761,8 +760,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3826906
-    No: 6   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 64, 4]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 128, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7575394
+    No: 5   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -884,8 +883,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 1, 512]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9029239
-    No: 7   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 256]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 256, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7420597
+    No: 6   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1007,8 +1006,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,3183625
-    No: 8   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 2, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,3428846
+    No: 7   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1130,8 +1129,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 8, 64]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7371529
-    No: 9   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 256, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 256]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,3281572
+    No: 8   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1253,8 +1252,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 32, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6714222
-    No: 10  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6853151
+    No: 9   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1376,8 +1375,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 2, 16]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9279551
-    No: 11  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 64, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2441250
+    No: 10  GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1499,26 +1498,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 64, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5284901
-    No: 12  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
-        res = future.result()
-      File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
-        return self.__get_result()
-      File "/usr/lib/python3.7/concurrent/futures/_base.py", line 384, in __get_result
-        raise self._exception
-      File "/usr/lib/python3.7/concurrent/futures/thread.py", line 57, in run
-        result = self.fn(*self.args, **self.kwargs)
-      File "/workspace/python/tvm/contrib/popen_pool.py", line 432, in <lambda>
-        worker = lambda *args: self._worker_run(*args)
-      File "/workspace/python/tvm/contrib/popen_pool.py", line 401, in _worker_run
-        return proc.recv()
-      File "/workspace/python/tvm/contrib/popen_pool.py", line 309, in recv
-        raise TimeoutError()
-    TimeoutError
-
-            [('tile_f', [-1, 16, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,8797804
-    No: 13  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 16, 4, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,3432683
+    No: 11  GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1640,8 +1621,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 8, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4597274
-    No: 14  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 32, 2]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2236391
+    No: 12  GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1763,8 +1744,9 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 128, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1537190
-    No: 15  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,643650
+    No: 13  GFLOPS: 9.16/9.16       result: MeasureResult(costs=(0.025283704749999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.974963188171387, timestamp=1673069124.3009748)        [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3840760
+    No: 14  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1886,8 +1868,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,5138821
-    No: 16  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 64, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10305593
+    No: 15  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2009,9 +1991,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2282863
-    No: 17  GFLOPS: 8.08/18.74      result: MeasureResult(costs=(0.02864277625,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.394887685775757, timestamp=1673058819.6129212)       [('tile_f', [-1, 2, 4, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2430456
-    No: 18  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3674941
+    No: 16  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2133,8 +2114,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6125490
-    No: 19  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 32, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 128]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4243051
+    No: 17  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2256,8 +2237,9 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 16, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 512, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4097759
-    No: 20  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 1, 16]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,927466
+    No: 18  GFLOPS: 116.19/116.19   result: MeasureResult(costs=(0.001992447982142857,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.663283586502075, timestamp=1673069128.2002482)        [('tile_f', [-1, 8, 4, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7206032
+    No: 19  GFLOPS: 0.00/116.19     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2379,7 +2361,130 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 256]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7404097
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 2, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9307654
+    No: 20  GFLOPS: 0.00/116.19     result: Traceback (most recent call last):
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
+        func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
+        func = build(s, args, target_host=task.target_host, runtime=runtime)
+      File "/workspace/python/tvm/driver/build_module.py", line 227, in build
+        input_mod = lower(inputs, args, name=name, binds=binds)
+      File "/workspace/python/tvm/driver/build_module.py", line 134, in lower
+        return ffi.lower_schedule(inp, args, name, binds, simple_mode)
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 276, in tvm._ffi._cy3.core.FuncCall
+      File "tvm/_ffi/_cython/./base.pxi", line 181, in tvm._ffi._cy3.core.CHECK_CALL
+    tvm._ffi.base.TVMError: Traceback (most recent call last):
+      24: TVMFuncCall
+            at ../src/runtime/c_runtime_api.cc:477
+      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../include/tvm/runtime/packed_func.h:1217
+      22: Call
+            at ../include/tvm/runtime/packed_func.h:1213
+      21: operator()
+            at ../include/tvm/runtime/packed_func.h:1730
+      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
+            at ../include/tvm/runtime/packed_func.h:1670
+      19: run<>
+            at ../include/tvm/runtime/packed_func.h:1630
+      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1645
+      13: operator()
+            at ../src/driver/driver_api.cc:395
+      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
+            at ../src/driver/driver_api.cc:381
+      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
+            at ../src/driver/driver_api.cc:276
+      10: tvm::transform::Pass::operator()(tvm::IRModule) const
+            at ../src/ir/transform.cc:258
+      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/ir/transform.cc:274
+      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/ir/transform.cc:454
+      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/ir/transform.cc:274
+      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/tir/ir/transform.cc:100
+      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
+            at ../include/tvm/runtime/packed_func.h:1749
+      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
+            at ../include/tvm/runtime/packed_func.h:1693
+      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
+            at ../include/tvm/runtime/packed_func.h:1617
+      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../include/tvm/runtime/packed_func.h:1217
+      1: Call
+            at ../include/tvm/runtime/packed_func.h:1213
+      0: operator()
+            at ../src/runtime/c_runtime_api.cc:534
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
+        raise InstantiationError("Skipped because of invalid gpu kernel")
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+
+    Traceback (most recent call last):
+      24: TVMFuncCall
+            at ../src/runtime/c_runtime_api.cc:477
+      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../include/tvm/runtime/packed_func.h:1217
+      22: Call
+            at ../include/tvm/runtime/packed_func.h:1213
+      21: operator()
+            at ../include/tvm/runtime/packed_func.h:1730
+      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
+            at ../include/tvm/runtime/packed_func.h:1670
+      19: run<>
+            at ../include/tvm/runtime/packed_func.h:1630
+      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1630
+      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
+            at ../include/tvm/runtime/packed_func.h:1645
+      13: operator()
+            at ../src/driver/driver_api.cc:395
+      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
+            at ../src/driver/driver_api.cc:381
+      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
+            at ../src/driver/driver_api.cc:276
+      10: tvm::transform::Pass::operator()(tvm::IRModule) const
+            at ../src/ir/transform.cc:258
+      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/ir/transform.cc:274
+      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/ir/transform.cc:454
+      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/ir/transform.cc:274
+      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
+            at ../src/tir/ir/transform.cc:100
+      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
+            at ../include/tvm/runtime/packed_func.h:1749
+      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
+            at ../include/tvm/runtime/packed_func.h:1693
+      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
+            at ../include/tvm/runtime/packed_func.h:1617
+      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../include/tvm/runtime/packed_func.h:1217
+      1: Call
+            at ../include/tvm/runtime/packed_func.h:1213
+      0: operator()
+            at ../src/runtime/c_runtime_api.cc:534
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
+        raise InstantiationError("Skipped because of invalid gpu kernel")
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 8, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6417359
 
 
 
@@ -2434,9 +2539,9 @@ and measure running time.
     Finish loading 20 records
 
     Best config:
-    [('tile_f', [-1, 4, 1, 32]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5303507
+    [('tile_f', [-1, 8, 4, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7206032
     Finish loading 20 records
-    Time cost of this operator: 0.011815
+    Time cost of this operator: 0.002243
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index ddfb05e6d6..ed940486be 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -368,10 +368,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.1     98.695   (1, 2, 10, 10, 3)  2       1        [312.1]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.148     0.995    (1, 6, 10, 10)     1       1        [3.148]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.978     0.309    (1, 1, 10, 10, 3)  1       1        [0.978]           
-    Total_time                                    -                                             316.226   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  339.1     98.737   (1, 2, 10, 10, 3)  2       1        [339.1]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.341     0.973    (1, 6, 10, 10)     1       1        [3.341]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.998     0.291    (1, 1, 10, 10, 3)  1       1        [0.998]           
+    Total_time                                    -                                             343.439   -        -                  -       -        -                 
 
 
 
@@ -436,10 +436,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  103.3     97.519   (1, 6, 10, 10, 1)  2       1        [103.3]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.775     1.676    (1, 6, 10, 10)     1       1        [1.775]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.853     0.805    (1, 3, 10, 10, 1)  1       1        [0.853]           
-    Total_time                                    -                                             105.928   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  102.8     97.478   (1, 6, 10, 10, 1)  2       1        [102.8]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.806     1.712    (1, 6, 10, 10)     1       1        [1.806]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.854     0.81     (1, 3, 10, 10, 1)  1       1        [0.854]           
+    Total_time                                    -                                             105.459   -        -                  -       -        -                 
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
index 616f354c90..38bdb73ef4 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
@@ -117,7 +117,7 @@ download a cat image and preprocess it to use as the model input.
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/ao/quantization/utils.py:281: UserWarning: must run observer before calling calculate_qparams. Returning default values.
       "must run observer before calling calculate_qparams. " +
     Downloading: "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
-
      0%|          | 0.00/3.42M [00:00<?, ?B/s]
    100%|##########| 3.42M/3.42M [00:00<00:00, 153MB/s]
+
      0%|          | 0.00/3.42M [00:00<?, ?B/s]
    100%|##########| 3.42M/3.42M [00:00<00:00, 231MB/s]
     /workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
       return LooseVersion(torch_ver) > ver
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
@@ -322,7 +322,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  1.972 seconds)
+   **Total running time of the script:** ( 1 minutes  8.786 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_pytorch.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 9c91db6b9b..c346579b5b 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -218,7 +218,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpa6k21yod/images/random'
+    '/tmp/tmp5h8wk54q/images/random'
 
 
 
@@ -309,7 +309,7 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
 
 .. image-sg:: /how_to/work_with_microtvm/images/sphx_glr_micro_train_001.png
-   :alt: [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]
+   :alt: [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]
    :srcset: /how_to/work_with_microtvm/images/sphx_glr_micro_train_001.png
    :class: sphx-glr-single-img
 
@@ -318,8 +318,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpa6k21yod/images/target contains 8144 images
-    /tmp/tmpa6k21yod/images/random contains 5000 images
+    /tmp/tmp5h8wk54q/images/target contains 8144 images
+    /tmp/tmp5h8wk54q/images/random contains 5000 images
 
 
 
@@ -494,13 +494,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 47s - loss: 0.2259 - accuracy: 0.9216 - val_loss: 0.1162 - val_accuracy: 0.9562 - 47s/epoch - 142ms/step
+    328/328 - 48s - loss: 0.2220 - accuracy: 0.9247 - val_loss: 0.1724 - val_accuracy: 0.9335 - 48s/epoch - 146ms/step
     Epoch 2/3
-    328/328 - 43s - loss: 0.0972 - accuracy: 0.9643 - val_loss: 0.1257 - val_accuracy: 0.9600 - 43s/epoch - 130ms/step
+    328/328 - 44s - loss: 0.0987 - accuracy: 0.9647 - val_loss: 0.1322 - val_accuracy: 0.9486 - 44s/epoch - 136ms/step
     Epoch 3/3
-    328/328 - 43s - loss: 0.0628 - accuracy: 0.9772 - val_loss: 0.1478 - val_accuracy: 0.9517 - 43s/epoch - 131ms/step
+    328/328 - 45s - loss: 0.0816 - accuracy: 0.9699 - val_loss: 0.0834 - val_accuracy: 0.9698 - 45s/epoch - 136ms/step
 
-    <keras.callbacks.History object at 0x7f243f09af10>
+    <keras.callbacks.History object at 0x7fb9bd706390>
 
 
 
@@ -857,7 +857,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 4 minutes  12.686 seconds)
+   **Total running time of the script:** ( 5 minutes  1.296 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 771c4bc60f..b1455b4b5d 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,20 +5,20 @@
 
 Computation times
 =================
-**06:16.992** total execution time for **how_to_work_with_microtvm** files:
+**07:15.519** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:12.686 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 05:01.296 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_pytorch.py` (``micro_pytorch.py``)           | 01:01.972 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_pytorch.py` (``micro_pytorch.py``)           | 01:08.786 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:50.713 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:53.248 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:07.872 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:08.145 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.747 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:04.042 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``) | 00:00.001 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_reference_vm.py` (``micro_reference_vm.py``) | 00:00.002 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.001 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 3a22f39f0f..e3d445ace1 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:43.885** total execution time for **how_to_work_with_relay** files:
+**00:45.645** total execution time for **how_to_work_with_relay** files:
 
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:32.031 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:33.432 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:10.205 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:10.569 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.642 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.637 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)                 | 00:00.007 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index ef0ec83ded..98de57d4d0 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -265,7 +265,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7f246f70cb00>
+    <function my_cuda_math_rule at 0x7fb9b89bc9e0>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 612a96d4bb..3eec8541df 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**00:06.869** total execution time for **how_to_work_with_schedules** files:
+**00:07.617** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:04.329 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:04.987 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.197 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.226 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.574 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.593 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.555 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.579 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.113 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.126 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.049 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.029 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.032 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.024 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.026 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index e050775cea..702783dd88 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -347,7 +347,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
                  C: Buffer(C_2: Pointer(float32), float32, [1024, 512], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpzylmsdtf/input0.cc'\nsource_filename = \"/tmp/tmpzylmsdtf/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpsit_0zwl/input0.cc'\nsource_filename = \"/tmp/tmpsit_0zwl/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index bda96b93ed..d6fc912b91 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:25.653** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:27.545** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:25.647 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:27.539 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.006 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.007 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 908591be6b..d823da9bcf 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -293,7 +293,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 28.13s!
+    resnet18_v1 inference graph built in 31.28s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index 4564bd9670..a88cf3112c 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -337,7 +337,7 @@ The compilation steps are:
 
     /workspace/python/tvm/relay/build_module.py:348: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 19.16s!
+    yolov3-tiny inference graph built in 19.46s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index d4bff513b3..1a51b2dded 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:30.679** total execution time for **topic_vta_tutorials_frontend** files:
+**01:37.705** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:45.819 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:49.505 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:44.860 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:48.200 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 0414519bb2..0bbacded71 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.107** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.566** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.654 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:03.062 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.453 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.504 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index f57d5c05c6..c46311ae6e 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.784** total execution time for **topic_vta_tutorials** files:
+**00:00.851** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.417 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.435 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.367 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.415 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 9431079078..93a9a11422 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -207,13 +207,6 @@ trials, we can load the best schedule from the log file and apply it.
 
 
 
-.. rst-class:: sphx-glr-script-out
-
- .. code-block:: none
-
-    *E
-
-
 
 
 
@@ -336,7 +329,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 96.043 ms
+    Execution time of this operator: 95.133 ms
 
 
 
@@ -436,7 +429,7 @@ resume the status and do more 5 trials.
     Resume search:
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/xgboost/training.py:17: UserWarning: Old style callback is deprecated.  See: https://xgboost.readthedocs.io/en/latest/python/callbacks.html
       warnings.warn(f'Old style callback is deprecated.  See: {link}', UserWarning)
-    *E
+
 
 
 
@@ -454,7 +447,7 @@ operations.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  37.247 seconds)
+   **Total running time of the script:** ( 1 minutes  17.955 seconds)
 
 
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index 9dc328ac6e..1371715cb1 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -454,16 +454,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 7.27/7.27       result: MeasureResult(costs=(0.0369452094,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.8189527988433838, timestamp=1673057385.5661008)       [('tile_y', [-1, 1]), ('tile_x', [-1, 32])],None,50
-    No: 2   GFLOPS: 2.54/7.27       result: MeasureResult(costs=(0.10563635759999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.9483799934387207, timestamp=1673057388.2540984)        [('tile_y', [-1, 2]), ('tile_x', [-1, 16])],None,41
-    No: 3   GFLOPS: 3.94/7.27       result: MeasureResult(costs=(0.068164846,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3446650505065918, timestamp=1673057389.5958967)        [('tile_y', [-1, 64]), ('tile_x', [-1, 16])],None,46
-    No: 4   GFLOPS: 12.99/12.99     result: MeasureResult(costs=(0.0206603994,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5796749591827393, timestamp=1673057390.9315107)       [('tile_y', [-1, 64]), ('tile_x', [-1, 512])],None,96
-    No: 5   GFLOPS: 3.28/12.99      result: MeasureResult(costs=(0.0819551278,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.561246395111084, timestamp=1673057392.672882) [('tile_y', [-1, 64]), ('tile_x', [-1, 8])],None,36
-    No: 6   GFLOPS: 3.23/12.99      result: MeasureResult(costs=(0.0831159276,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5645718574523926, timestamp=1673057394.2465796)       [('tile_y', [-1, 8]), ('tile_x', [-1, 8])],None,33
-    No: 7   GFLOPS: 2.64/12.99      result: MeasureResult(costs=(0.10162363779999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8636929988861084, timestamp=1673057396.8698485)        [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
-    No: 8   GFLOPS: 11.85/12.99     result: MeasureResult(costs=(0.0226466734,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6712026596069336, timestamp=1673057397.4798148)       [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
-    No: 9   GFLOPS: 2.81/12.99      result: MeasureResult(costs=(0.095573849,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.726302146911621, timestamp=1673057399.3290334) [('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24
-    No: 10  GFLOPS: 3.70/12.99      result: MeasureResult(costs=(0.0725857672,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3688905239105225, timestamp=1673057400.7390091)       [('tile_y', [-1, 128]), ('tile_x', [-1, 16])],None,47
+    No: 1   GFLOPS: 10.12/10.12     result: MeasureResult(costs=(0.0265268426,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6952898502349854, timestamp=1673067663.8775885)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
+    No: 2   GFLOPS: 2.24/10.12      result: MeasureResult(costs=(0.11978428599999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.1533713340759277, timestamp=1673067666.04934)  [('tile_y', [-1, 512]), ('tile_x', [-1, 8])],None,39
+    No: 3   GFLOPS: 0.87/10.12      result: MeasureResult(costs=(0.30788740579999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.158709287643433, timestamp=1673067672.070778)  [('tile_y', [-1, 32]), ('tile_x', [-1, 2])],None,15
+    No: 4   GFLOPS: 3.11/10.12      result: MeasureResult(costs=(0.0862554158,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5931015014648438, timestamp=1673067674.4703805)       [('tile_y', [-1, 256]), ('tile_x', [-1, 8])],None,38
+    No: 5   GFLOPS: 8.42/10.12      result: MeasureResult(costs=(0.0318956848,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.7621068954467773, timestamp=1673067675.6581385)       [('tile_y', [-1, 4]), ('tile_x', [-1, 32])],None,52
+    No: 6   GFLOPS: 9.88/10.12      result: MeasureResult(costs=(0.027176364999999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9935829639434814, timestamp=1673067676.3468971)       [('tile_y', [-1, 16]), ('tile_x', [-1, 128])],None,74
+    No: 7   GFLOPS: 13.28/13.28     result: MeasureResult(costs=(0.020212576000000003,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.603386640548706, timestamp=1673067677.6897607)        [('tile_y', [-1, 128]), ('tile_x', [-1, 64])],None,67
+    No: 8   GFLOPS: 3.89/13.28      result: MeasureResult(costs=(0.06895605219999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.357858419418335, timestamp=1673067679.0462306) [('tile_y', [-1, 4]), ('tile_x', [-1, 16])],None,42
+    No: 9   GFLOPS: 2.09/13.28      result: MeasureResult(costs=(0.128442294,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2716591358184814, timestamp=1673067681.430947) [('tile_y', [-1, 128]), ('tile_x', [-1, 4])],None,27
+    No: 10  GFLOPS: 0.50/13.28      result: MeasureResult(costs=(0.5333299123999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=8.740058898925781, timestamp=1673067690.2128575)  [('tile_y', [-1, 64]), ('tile_x', [-1, 1])],None,6
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index d12f275a36..3e0018d91a 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -324,7 +324,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 509.8624551199999, 'median': 509.88398250000273, 'std': 1.470863428935131}
+    {'mean': 511.40193933999853, 'median': 511.7527159499957, 'std': 1.7348229582885892}
 
 
 
@@ -558,30 +558,29 @@ the tuning data to.
 
  .. code-block:: none
 
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:    3.39/  23.48 GFLOPS | Progress: (4/20) | 8.57 s
    [Task  1/25]  Current/Best:   14.47/  23.48 GFLOPS | Progress: (8/20) | 11.46 s
    [Task  1/25]  Current/Best:   16.35/  23.48 GFLOPS | Progress: (12/20) | 14.04 s
    [Task  1/25]  Current/Best:   19.23/  23.48 GFLOPS | Progress: (16/20) | 16.47 s
    [Task  1/25]  Current/Best:   14.63/  23.48 GFLOPS | Progress: (20/20) | 19.20 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.44/  18.80 GFLOPS | Progress: (4/20) | 4.05 s
    [Task  2/25]  Current/Best:   13.19/  18.80 GFLOPS | Progress: (8/20) | 5.63 s
    [Task  2/25]  Current/Best:   11.24/  18.80 GFLOPS | Progress: (12/20) | 7.49 s
    [Task  2/25]  Current/Best:   16.10/  21.82 GFLOPS | Progress: (16/20) | 9.31 s
    [Task  2/25]  Current/Best:    7.87/  21.82 GFLOPS | Progress: (20/20) | 11.33 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:   17.85/  17.85 GFLOPS | Progress: (4/20) | 3.72 s
    [Task  3/25]  Current/Best:   14.33/  17.85 GFLOPS | Progress: (8/20) | 7.14 s
    [Task  3/25]  Current/Best:   16.93/  19.19 GFLOPS | Progress: (12/20) | 9.57 s
    [Task  3/25]  Current/Best:   23.79/  23.79 GFLOPS | Progress: (16/20) | 12.14 s
    [Task  3/25]  Current/Best:    8.33/  23.79 GFLOPS | Progress: (20/20) | 14.72 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    6.39/  16.33 GFLOPS | Progress: (4/20) | 7.12 s
    [Task  4/25]  Current/Best:   18.37/  18.37 GFLOPS | Progress: (8/20) | 11.37 s
    [Task  4/25]  Current/Best:   16.69/  18.37 GFLOPS | Progress: (12/20) | 13.68 s
    [Task  4/25]  Current/Best:   16.99/  19.41 GFLOPS | Progress: (16/20) | 15.88 s
    [Task  4/25]  Current/Best:   11.47/  21.32 GFLOPS | Progress: (20/20) | 17.82 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    5.12/  11.15 GFLOPS | Progress: (4/20) | 4.22 s
    [Task  5/25]  Current/Best:   20.14/  20.14 GFLOPS | Progress: (8/20) | 6.19 s
    [Task  5/25]  Current/Best:   19.56/  20.14 GFLOPS | Progress: (12/20) | 8.48 s
    [Task  5/25]  Current/Best:   12.89/  20.14 GFLOPS | Progress: (16/20) | 10.81 s
    [Task  5/25]  Current/Best:    7.79/  21.00 GFLOPS | Progress: (20/20) | 12.76 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   15.14/  17.06 GFLOPS | Progress: (4/20) | 4.84 s
    [Task  6/25]  Current/Best:    9.70/  22.19 GFLOPS | Progress: (8/20) | 8.32 s
    [Task  6/25]  Current/Best:   11.85/  22.19 GFLOPS | Progress: (12/20) | 10.69 s
    [Task  6/25]  Current/Best:    8.77/  22.19 GFLOPS | Progress: (16/20) | 13.98 s
    [Task  6/25]  Current/Best:    4.06/  22.19 GFLOPS | Progress: (20/20) | 17.81 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.88/  19.99 GFLOPS | Progress: (4/20) | 3.98 s
    [Task  7/25]  Current/Best:   15.53/  19.99 GFLOPS | Progress: (8/20) | 6.44 s
    [Task  7/25]  Current/Best:   15.03/  22.63 GFLOPS | Progress: (12/20) | 8.50 s
    [Task  7/25]  Current/Best:    8.33/  22.63 GFLOPS | Progress: (16/20) | 11.03 s
    [Task  7/25]  Current/Best:   12.29/  22.63 GFLOPS | Progress: (20/20) | 15.06 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    5.45/  20.51 GFLOPS | Progress: (4/20) | 9.22 s
    [Task  8/25]  Current/Best:    9.01/  20.51 GFLOPS | Progress: (8/20) | 16.13 s
    [Task  8/25]  Current/Best:   20.57/  20.57 GFLOPS | Progress: (12/20) | 20.30 s
    [Task  8/25]  Current/Best:   11.45/  20.57 GFLOPS | Progress: (16/20) | 23.76 s
    [Task  8/25]  Current/Best:   11.88/  20.57 GFLOPS | Progress: (20/20) | 30.96 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   13.94/  20.84 GFLOPS | Progress: (4/20) | 3.47 s
    [Task  9/25]  Current/Best:    6.78/  20.84 GFLOPS | Progress: (8/20) | 12.09 s
    [Task  9/25]  Current/Best:   12.84/  20.84 GFLOPS | Progress: (12/20) | 14.39 s
    [Task  9/25]  Current/Best:   12.27/  20.84 GFLOPS | Progress: (16/20) | 17.72 s
    [Task  9/25]  Current/Best:   13.71/  20.84 GFLOPS | Progress: (20/20) | 19.86 s Done.
-
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   20.21/  20.21 GFLOPS | Progress: (4/20) | 3.56 s
    [Task 10/25]  Current/Best:   19.13/  20.21 GFLOPS | Progress: (8/20) | 5.39 s
    [Task 10/25]  Current/Best:   12.31/  20.21 GFLOPS | Progress: (12/20) | 7.32 s
    [Task 10/25]  Current/Best:   18.00/  20.21 GFLOPS | Progress: (16/20) | 8.90 s
    [Task 10/25]  Current/Best:   14.41/  20.21 GFLOPS | Progress: (20/20) | 11.05 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:    9.43/  21.74 GFLOPS | Progress: (4/20) | 4.07 s
    [Task 11/25]  Current/Best:   22.13/  22.13 GFLOPS | Progress: (8/20) | 6.30 s
    [Task 11/25]  Current/Best:    6.80/  22.13 GFLOPS | Progress: (12/20) | 8.97 s
    [Task 11/25]  Current/Best:    6.18/  22.13 GFLOPS | Progress: (16/20) | 11.35 s
    [Task 11/25]  Current/Best:   21.74/  22.13 GFLOPS | Progress: (20/20) | 13.87 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:   17.82/  17.82 GFLOPS | Progress: (4/20) | 4.21 s
    [Task 12/25]  Current/Best:    5.37/  17.82 GFLOPS | Progress: (8/20) | 7.03 s
    [Task 12/25]  Current/Best:    6.42/  17.82 GFLOPS | Progress: (12/20) | 10.61 s
    [Task 12/25]  Current/Best:    8.66/  17.82 GFLOPS | Progress: (16/20) | 14.17 s
    [Task 12/25]  Current/Best:    3.39/  17.82 GFLOPS | Progress: (20/20) | 16.99 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:   17.05/  19.37 GFLOPS | Progress: (4/20) | 5.15 s
    [Task 13/25]  Current/Best:   12.05/  19.37 GFLOPS | Progress: (8/20) | 7.40 s
    [Task 13/25]  Current/Best:    6.52/  19.37 GFLOPS | Progress: (12/20) | 11.22 s
    [Task 13/25]  Current/Best:    9.16/  23.28 GFLOPS | Progress: (16/20) | 14.72 s
    [Task 13/25]  Current/Best:   23.21/  23.28 GFLOPS | Progress: (20/20) | 18.62 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   17.63/  17.63 GFLOPS | Progress: (4/20) | 3.54 s
    [Task 14/25]  Current/Best:   22.81/  22.81 GFLOPS | Progress: (8/20) | 7.26 s
    [Task 14/25]  Current/Best:   18.55/  22.81 GFLOPS | Progress: (12/20) | 10.65 s
    [Task 14/25]  Current/Best:   16.46/  22.81 GFLOPS | Progress: (16/20) | 13.51 s
    [Task 14/25]  Current/Best:   12.52/  22.81 GFLOPS | Progress: (20/20) | 17.06 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
-
    [Task 15/25]  Current/Best:   10.96/  15.83 GFLOPS | Progress: (4/20) | 4.19 s
    [Task 15/25]  Current/Best:   17.33/  17.33 GFLOPS | Progress: (8/20) | 6.45 s
    [Task 15/25]  Current/Best:    6.43/  17.33 GFLOPS | Progress: (12/20) | 8.82 s
    [Task 15/25]  Current/Best:    6.57/  17.33 GFLOPS | Progress: (16/20) | 11.89 s
    [Task 15/25]  Current/Best:    3.21/  17.33 GFLOPS | Progress: (20/20) | 15.94 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   18.48/  18.48 GFLOPS | Progress: (4/20) | 5.07 s
    [Task 16/25]  Current/Best:   10.61/  18.48 GFLOPS | Progress: (8/20) | 8.55 s
    [Task 16/25]  Current/Best:   16.13/  18.48 GFLOPS | Progress: (12/20) | 11.80 s
    [Task 16/25]  Current/Best:   10.55/  18.48 GFLOPS | Progress: (16/20) | 14.24 s
    [Task 16/25]  Current/Best:   14.89/  18.88 GFLOPS | Progress: (20/20) | 15.75 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   18.78/  22.71 GFLOPS | Progress: (4/20) | 4.65 s
    [Task 17/25]  Current/Best:   13.67/  22.71 GFLOPS | Progress: (8/20) | 7.31 s
    [Task 17/25]  Current/Best:   15.38/  22.71 GFLOPS | Progress: (12/20) | 12.75 s
    [Task 17/25]  Current/Best:   23.10/  23.10 GFLOPS | Progress: (16/20) | 15.05 s
    [Task 17/25]  Current/Best:   11.73/  23.10 GFLOPS | Progress: (20/20) | 18.27 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:    5.73/  17.32 GFLOPS | Progress: (4/20) | 4.28 s
    [Task 18/25]  Current/Best:    5.65/  17.32 GFLOPS | Progress: (8/20) | 7.62 s
    [Task 18/25]  Current/Best:   13.22/  17.32 GFLOPS | Progress: (12/20) | 10.47 s
    [Task 18/25]  Current/Best:   18.91/  18.91 GFLOPS | Progress: (16/20) | 16.89 s
    [Task 18/25]  Current/Best:   10.98/  21.34 GFLOPS | Progress: (20/20) | 25.36 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    9.70/  19.30 GFLOPS | Progress: (4/20) | 5.06 s
    [Task 19/25]  Current/Best:   19.27/  19.30 GFLOPS | Progress: (8/20) | 10.10 s
    [Task 19/25]  Current/Best:   19.84/  19.84 GFLOPS | Progress: (12/20) | 13.62 s
    [Task 19/25]  Current/Best:    8.95/  19.84 GFLOPS | Progress: (16/20) | 16.35 s
    [Task 19/25]  Current/Best:    3.09/  19.84 GFLOPS | Progress: (20/20) | 19.47 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:   20.38/  20.38 GFLOPS | Progress: (4/20) | 3.64 s
    [Task 20/25]  Current/Best:   14.25/  20.38 GFLOPS | Progress: (8/20) | 7.43 s
    [Task 20/25]  Current/Best:   19.47/  20.38 GFLOPS | Progress: (12/20) | 10.03 s
    [Task 20/25]  Current/Best:   15.41/  20.38 GFLOPS | Progress: (16/20) | 12.69 s
    [Task 20/25]  Current/Best:   16.19/  20.38 GFLOPS | Progress: (20/20) | 15.63 s Done.
-
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    9.86/  14.38 GFLOPS | Progress: (4/20) | 3.46 s
    [Task 21/25]  Current/Best:    5.36/  14.38 GFLOPS | Progress: (8/20) | 6.60 s
    [Task 21/25]  Current/Best:   17.60/  17.60 GFLOPS | Progress: (12/20) | 12.32 s
    [Task 21/25]  Current/Best:    8.90/  17.60 GFLOPS | Progress: (16/20) | 14.57 s
    [Task 21/25]  Current/Best:    5.24/  17.60 GFLOPS | Progress: (20/20) | 16.57 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:   10.02/  16.26 GFLOPS | Progress: (4/20) | 3.47 s
    [Task 22/25]  Current/Best:   18.42/  18.42 GFLOPS | Progress: (8/20) | 5.51 s
    [Task 22/25]  Current/Best:   10.73/  18.42 GFLOPS | Progress: (12/20) | 7.14 s
    [Task 22/25]  Current/Best:   18.72/  18.72 GFLOPS | Progress: (16/20) | 8.96 s
    [Task 22/25]  Current/Best:    5.36/  20.90 GFLOPS | Progress: (20/20) 
 | 11.16 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:    9.30/  10.05 GFLOPS | Progress: (4/20) | 6.24 s
    [Task 23/25]  Current/Best:   16.43/  16.43 GFLOPS | Progress: (8/20) | 10.41 s
    [Task 23/25]  Current/Best:   16.71/  23.63 GFLOPS | Progress: (12/20) | 13.55 s
    [Task 23/25]  Current/Best:    1.55/  23.63 GFLOPS | Progress: (16/20) | 17.89 s
    [Task 23/25]  Current/Best:   15.22/  23.63 GFLOPS | Progress: (20/20) | 20.66 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    2.88/   6.64 GFLOPS | Progress: (4/20) | 5.22 s
    [Task 24/25]  Current/Best:    9.86/   9.86 GFLOPS | Progress: (8/20) | 6.61 s
    [Task 24/25]  Current/Best:    3.58/  10.31 GFLOPS | Progress: (12/20) | 17.54 s
    [Task 24/25]  Current/Best:    5.97/  10.31 GFLOPS | Progress: (16/20) | 28.18 s
    [Task 24/25]  Current/Best:    3.07/  10.31 GFLOPS | Progress: (20/20) | 38.81 s
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
-     Done.
-
    [Task 25/25]  Current/Best:   10.08/  10.08 GFLOPS | Progress: (4/20) | 13.46 s
    [Task 25/25]  Current/Best:    3.04/  10.08 GFLOPS | Progress: (8/20) | 15.48 s
    [Task 25/25]  Current/Best:    1.55/  10.08 GFLOPS | Progress: (12/20) | 18.45 s
    [Task 25/25]  Current/Best:    3.04/  10.08 GFLOPS | Progress: (16/20) | 28.82 s
    [Task 25/25]  Current/Best:    3.34/  10.08 GFLOPS | Progress: (20/20) | 38.73 s
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:    8.45/  20.56 GFLOPS | Progress: (4/20) | 9.42 s
    [Task  1/25]  Current/Best:    7.55/  20.56 GFLOPS | Progress: (8/20) | 13.26 s
    [Task  1/25]  Current/Best:   11.31/  20.56 GFLOPS | Progress: (12/20) | 16.86 s
    [Task  1/25]  Current/Best:    6.40/  20.56 GFLOPS | Progress: (16/20) | 20.62 s
    [Task  1/25]  Current/Best:    6.25/  20.56 GFLOPS | Progress: (20/20) | 23.16 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:    5.43/  16.90 GFLOPS | Progress: (4/20) | 3.27 s
    [Task  2/25]  Current/Best:    6.00/  16.90 GFLOPS | Progress: (8/20) | 4.85 s
    [Task  2/25]  Current/Best:   17.82/  17.82 GFLOPS | Progress: (12/20) | 6.17 s
    [Task  2/25]  Current/Best:    7.22/  17.82 GFLOPS | Progress: (16/20) | 8.03 s
    [Task  2/25]  Current/Best:   17.10/  17.82 GFLOPS | Progress: (20/20) | 9.97 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:   20.88/  20.88 GFLOPS | Progress: (4/20) | 4.26 s
    [Task  3/25]  Current/Best:    5.77/  21.69 GFLOPS | Progress: (8/20) | 6.94 s
    [Task  3/25]  Current/Best:   12.83/  21.69 GFLOPS | Progress: (12/20) | 10.33 s
    [Task  3/25]  Current/Best:    5.65/  21.69 GFLOPS | Progress: (16/20) | 13.05 s
    [Task  3/25]  Current/Best:    5.07/  21.69 GFLOPS | Progress: (20/20) | 15.90 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:   13.29/  19.68 GFLOPS | Progress: (4/20) | 4.36 s
    [Task  4/25]  Current/Best:   11.68/  19.68 GFLOPS | Progress: (8/20) | 7.29 s
    [Task  4/25]  Current/Best:   12.04/  19.68 GFLOPS | Progress: (12/20) | 9.96 s
    [Task  4/25]  Current/Best:   17.12/  19.68 GFLOPS | Progress: (16/20) | 11.81 s
    [Task  4/25]  Current/Best:   10.22/  20.15 GFLOPS | Progress: (20/20) | 13.66 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:   17.47/  17.47 GFLOPS | Progress: (4/20) | 3.64 s
    [Task  5/25]  Current/Best:    8.82/  19.33 GFLOPS | Progress: (8/20) | 5.62 s
    [Task  5/25]  Current/Best:   11.00/  19.33 GFLOPS | Progress: (12/20) | 7.80 s
    [Task  5/25]  Current/Best:   18.11/  19.33 GFLOPS | Progress: (16/20) | 10.21 s
    [Task  5/25]  Current/Best:   18.21/  19.33 GFLOPS | Progress: (20/20) | 12.25 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   10.38/  17.83 GFLOPS | Progress: (4/20) | 4.48 s
    [Task  6/25]  Current/Best:   11.86/  17.83 GFLOPS | Progress: (8/20) | 8.16 s
    [Task  6/25]  Current/Best:   10.32/  17.83 GFLOPS | Progress: (12/20) | 10.67 s
    [Task  6/25]  Current/Best:    5.93/  18.10 GFLOPS | Progress: (16/20) | 14.16 s
    [Task  6/25]  Current/Best:    7.48/  18.10 GFLOPS | Progress: (20/20) | 18.00 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.94/  18.19 GFLOPS | Progress: (4/20) | 4.22 s
    [Task  7/25]  Current/Best:   12.71/  20.11 GFLOPS | Progress: (8/20) | 7.10 s
    [Task  7/25]  Current/Best:    1.59/  20.11 GFLOPS | Progress: (12/20) | 10.90 s
    [Task  7/25]  Current/Best:   11.90/  20.11 GFLOPS | Progress: (16/20) | 13.77 s
    [Task  7/25]  Current/Best:   17.90/  20.11 GFLOPS | Progress: (20/20) | 15.74 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.14/  12.96 GFLOPS | Progress: (4/20) | 6.84 s
    [Task  8/25]  Current/Best:   11.96/  12.96 GFLOPS | Progress: (8/20) | 11.40 s
    [Task  8/25]  Current/Best:    9.65/  12.96 GFLOPS | Progress: (12/20) | 17.14 s
    [Task  8/25]  Current/Best:    5.84/  12.96 GFLOPS | Progress: (16/20) | 20.96 s
    [Task  8/25]  Current/Best:    3.30/  13.12 GFLOPS | Progress: (20/20) | 25.00 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   11.42/  16.90 GFLOPS | Progress: (4/20) | 9.73 s
    [Task  9/25]  Current/Best:    7.96/  17.34 GFLOPS | Progress: (8/20) | 11.53 s
    [Task  9/25]  Current/Best:   11.62/  17.34 GFLOPS | Progress: (12/20) | 13.92 s
    [Task  9/25]  Current/Best:   10.99/  17.34 GFLOPS | Progress: (16/20) | 25.07 s
    [Task  9/25]  Current/Best:   19.06/  19.06 GFLOPS | Progress: (20/20) | 26.83 s Done.
+
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   11.53/  14.59 GFLOPS | Progress: (4/20) | 3.69 s
    [Task 10/25]  Current/Best:   13.48/  16.44 GFLOPS | Progress: (8/20) | 5.67 s
    [Task 10/25]  Current/Best:    9.28/  17.96 GFLOPS | Progress: (12/20) | 7.36 s
    [Task 10/25]  Current/Best:    8.49/  17.96 GFLOPS | Progress: (16/20) | 9.98 s
    [Task 10/25]  Current/Best:    2.68/  17.96 GFLOPS | Progress: (20/20) | 12.41 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   16.91/  22.45 GFLOPS | Progress: (4/20) | 4.27 s
    [Task 11/25]  Current/Best:    7.99/  22.45 GFLOPS | Progress: (8/20) | 7.21 s
    [Task 11/25]  Current/Best:    8.02/  22.45 GFLOPS | Progress: (12/20) | 10.42 s
    [Task 11/25]  Current/Best:    8.73/  22.45 GFLOPS | Progress: (16/20) | 13.71 s
    [Task 11/25]  Current/Best:    9.83/  22.45 GFLOPS | Progress: (20/20) | 16.19 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    9.39/  17.57 GFLOPS | Progress: (4/20) | 3.86 s
    [Task 12/25]  Current/Best:   14.74/  17.57 GFLOPS | Progress: (8/20) | 6.53 s
    [Task 12/25]  Current/Best:   13.37/  17.57 GFLOPS | Progress: (12/20) | 8.92 s
    [Task 12/25]  Current/Best:   13.13/  17.57 GFLOPS | Progress: (16/20) | 11.27 s
    [Task 12/25]  Current/Best:   14.63/  17.57 GFLOPS | Progress: (20/20) | 13.96 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    3.08/  18.87 GFLOPS | Progress: (4/20) | 5.86 s
    [Task 13/25]  Current/Best:    5.88/  18.87 GFLOPS | Progress: (8/20) | 9.97 s
    [Task 13/25]  Current/Best:    3.10/  21.01 GFLOPS | Progress: (12/20) | 13.14 s
    [Task 13/25]  Current/Best:   11.18/  21.01 GFLOPS | Progress: (16/20) | 18.29 s
    [Task 13/25]  Current/Best:    9.71/  21.76 GFLOPS | Progress: (20/20) | 22.81 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   11.98/  17.42 GFLOPS | Progress: (4/20) | 8.22 s
    [Task 14/25]  Current/Best:   12.64/  17.42 GFLOPS | Progress: (8/20) | 12.11 s
    [Task 14/25]  Current/Best:   17.16/  18.27 GFLOPS | Progress: (12/20) | 15.19 s
    [Task 14/25]  Current/Best:   12.17/  18.27 GFLOPS | Progress: (16/20) | 17.52 s
    [Task 14/25]  Current/Best:    8.15/  18.27 GFLOPS | Progress: (20/20) | 24.74 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   17.61/  17.61 GFLOPS | Progress: (4/20) | 4.27 s
    [Task 15/25]  Current/Best:   12.36/  17.61 GFLOPS | Progress: (8/20) | 8.07 s Done.
+
    [Task 15/25]  Current/Best:   15.66/  20.18 GFLOPS | Progress: (12/20) | 9.77 s
    [Task 15/25]  Current/Best:   14.33/  20.18 GFLOPS | Progress: (16/20) | 12.84 s
    [Task 15/25]  Current/Best:   16.61/  21.13 GFLOPS | Progress: (20/20) | 14.31 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   13.78/  18.91 GFLOPS | Progress: (4/20) | 3.68 s
    [Task 16/25]  Current/Best:    6.47/  18.91 GFLOPS | Progress: (8/20) | 5.35 s
    [Task 16/25]  Current/Best:   16.09/  18.91 GFLOPS | Progress: (12/20) | 8.19 s
    [Task 16/25]  Current/Best:   11.02/  18.91 GFLOPS | Progress: (16/20) | 10.53 s
    [Task 16/25]  Current/Best:   13.33/  22.21 GFLOPS | Progress: (20/20) | 12.46 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:    5.71/  10.67 GFLOPS | Progress: (4/20) | 5.96 s
    [Task 17/25]  Current/Best:    6.60/  13.88 GFLOPS | Progress: (8/20) | 9.16 s
    [Task 17/25]  Current/Best:   18.16/  18.88 GFLOPS | Progress: (12/20) | 11.55 s
    [Task 17/25]  Current/Best:   18.15/  18.88 GFLOPS | Progress: (16/20) | 15.48 s
    [Task 17/25]  Current/Best:   14.83/  18.88 GFLOPS | Progress: (20/20) | 19.64 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:    7.19/  20.35 GFLOPS | Progress: (4/20) | 5.08 s
    [Task 18/25]  Current/Best:   10.30/  20.35 GFLOPS | Progress: (8/20) | 8.71 s
    [Task 18/25]  Current/Best:   14.46/  22.21 GFLOPS | Progress: (12/20) | 11.22 s
    [Task 18/25]  Current/Best:   18.34/  22.21 GFLOPS | Progress: (16/20) | 13.13 s
    [Task 18/25]  Current/Best:   17.07/  22.21 GFLOPS | Progress: (20/20) | 15.63 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:   12.42/  15.62 GFLOPS | Progress: (4/20) | 5.52 s
    [Task 19/25]  Current/Best:   19.67/  19.67 GFLOPS | Progress: (8/20) | 9.30 s
    [Task 19/25]  Current/Best:   21.77/  21.77 GFLOPS | Progress: (12/20) | 12.39 s
    [Task 19/25]  Current/Best:    9.34/  21.77 GFLOPS | Progress: (16/20) | 17.15 s
    [Task 19/25]  Current/Best:    8.70/  21.77 GFLOPS | Progress: (20/20) | 19.87 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.54/  16.81 GFLOPS | Progress: (4/20) | 4.51 s
    [Task 20/25]  Current/Best:    6.54/  16.81 GFLOPS | Progress: (8/20) | 11.14 s
    [Task 20/25]  Current/Best:    7.28/  17.40 GFLOPS | Progress: (12/20) | 13.28 s
    [Task 20/25]  Current/Best:   14.17/  17.40 GFLOPS | Progress: (16/20) | 14.47 s
    [Task 20/25]  Current/Best:    9.73/  17.40 GFLOPS | Progress: (20/20) | 18.35 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    3.05/  15.16 GFLOPS | Progress: (4/20) | 4.16 s
    [Task 21/25]  Current/Best:   14.15/  15.16 GFLOPS | Progress: (8/20) | 6.22 s Done.
+
    [Task 21/25]  Current/Best:   17.03/  17.03 GFLOPS | Progress: (12/20) | 9.22 s
    [Task 21/25]  Current/Best:   20.84/  20.84 GFLOPS | Progress: (16/20) | 11.36 s
    [Task 21/25]  Current/Best:   20.64/  20.84 GFLOPS | Progress: (20/20) | 14.02 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    6.37/  18.22 GFLOPS | Progress: (4/20) | 3.52 s
    [Task 22/25]  Current/Best:   13.87/  18.22 GFLOPS | Progress: (8/20) | 5.76 s
    [Task 22/25]  Current/Best:    8.05/  18.22 GFLOPS | Progress: (12/20) | 7.41 s
    [Task 22/25]  Current/Best:   19.21/  19.21 GFLOPS | Progress: (16/20) | 10.65 s
    [Task 22/25]  Current/Best:   11.40/  19.21 GFLOPS | Progress: (20/20) | 13.00 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   22.38/  22.38 GFLOPS | Progress: (4/20) | 4.82 s
    [Task 23/25]  Current/Best:   10.30/  22.38 GFLOPS | Progress: (8/20) | 9.79 s
    [Task 23/25]  Current/Best:    9.78/  22.38 GFLOPS | Progress: (12/20) | 13.82 s
    [Task 23/25]  Current/Best:    9.62/  22.38 GFLOPS | Progress: (16/20) | 16.93 s
    [Task 23/25]  Current/Best:   11.27/  22.38 GFLOPS | Progress: (20/20) | 19.60 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    6.30/   9.98 GFLOPS | Progress: (4/20) | 3.21 s
    [Task 24/25]  Current/Best:    7.37/   9.98 GFLOPS | Progress: (8/20) | 14.16 s
    [Task 24/25]  Current/Best:    3.38/   9.98 GFLOPS | Progress: (12/20) | 25.81 s
    [Task 24/25]  Current/Best:    6.84/   9.98 GFLOPS | Progress: (16/20) | 37.65 s
    [Task 24/25]  Current/Best:    6.82/   9.98 GFLOPS | Progress: (20/20) | 49.49 s
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
+
    [Task 25/25]  Current/Best:    1.55/   9.09 GFLOPS | Progress: (4/20) | 13.63 s
    [Task 25/25]  Current/Best:    1.52/   9.09 GFLOPS | Progress: (8/20) | 25.43 s
    [Task 25/25]  Current/Best:    8.87/   9.09 GFLOPS | Progress: (12/20) | 37.24 s
    [Task 25/25]  Current/Best:    5.63/   9.09 GFLOPS | Progress: (16/20) | 48.20 s
    [Task 25/25]  Current/Best:    1.51/   9.09 GFLOPS | Progress: (20/20) | 57.72 s
 
 
 
@@ -677,8 +676,8 @@ Verify that the optimized model runs and produces the same results:
 
  .. code-block:: none
 
-    class='n02123045 tabby, tabby cat' with probability=0.621104
-    class='n02123159 tiger cat' with probability=0.356378
+    class='n02123045 tabby, tabby cat' with probability=0.621105
+    class='n02123159 tiger cat' with probability=0.356377
     class='n02124075 Egyptian cat' with probability=0.019712
     class='n02129604 tiger, Panthera tigris' with probability=0.001215
     class='n04040759 radiator' with probability=0.000262
@@ -735,8 +734,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 405.47370385999784, 'median': 404.48424835000196, 'std': 3.194131063843192}
-    unoptimized: {'mean': 509.8624551199999, 'median': 509.88398250000273, 'std': 1.470863428935131}
+    optimized: {'mean': 415.34261833999835, 'median': 415.049274699993, 'std': 2.3663135859396354}
+    unoptimized: {'mean': 511.40193933999853, 'median': 511.7527159499957, 'std': 1.7348229582885892}
 
 
 
@@ -759,7 +758,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 11 minutes  18.256 seconds)
+   **Total running time of the script:** ( 11 minutes  57.606 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 78ad906f5f..6e84eed084 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -274,7 +274,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.239e-07 secs/op
+    1.279e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index 4d0795c05d..04d9f93a2c 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -264,7 +264,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x21cb4f30)), stage(b, placeholder(b, 0xac4fe90)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
+    [stage(a, placeholder(a, 0x6f86c90)), stage(b, placeholder(b, 0x1aea8f70)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index c3139470c7..f282283c89 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,32 +5,32 @@
 
 Computation times
 =================
-**14:50.640** total execution time for **tutorial** files:
+**15:24.492** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 11:18.256 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 11:57.606 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:37.247 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:17.955 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:59.557 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:58.664 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:33.236 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:36.162 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:20.168 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:31.511 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.193 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.574 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.819 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.822 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.154 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.189 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.006 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.007 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_uma.py` (``uma.py``)                                             | 00:00.001 | 0.0 MB |
-+------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.001 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_uma.py` (``uma.py``)                                             | 00:00.002 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.001 | 0.0 MB |
++------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 0f5270e6d4..60c05a85b3 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -298,7 +298,7 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000007
+    Numpy running time: 0.000008
     naive: 0.000007
 
 
@@ -397,7 +397,7 @@ compile and run this new schedule with the parallel operation applied:
 
  .. code-block:: none
 
-    parallel: 0.000006
+    parallel: 0.000007
 
 
 
@@ -503,10 +503,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    7.412830000248505e-06                    1.0
-                   naive    6.6970000000000004e-06    0.9034336413725248
-                parallel              6.0721e-06       0.819133853035405
-                  vector             2.45803e-05      3.3159130857143597
+                   numpy    7.751420000658981e-06                    1.0
+                   naive    7.026000000000001e-06     0.9064145665442837
+                parallel    6.9727999999999995e-06    0.8995513079419271
+                  vector             2.45814e-05        3.17121250014968
 
 
 
@@ -927,7 +927,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.017714
+    Numpy running time: 0.018905
 
 
 
@@ -985,7 +985,7 @@ optimizations.
 
  .. code-block:: none
 
-    none: 3.332128
+    none: 3.235728
 
 
 
@@ -1087,7 +1087,7 @@ schedule.
 
  .. code-block:: none
 
-    blocking: 0.289117
+    blocking: 0.292657
 
 
 
@@ -1182,7 +1182,7 @@ already cache friendly from our previous optimizations.
 
  .. code-block:: none
 
-    vectorization: 0.325714
+    vectorization: 0.334434
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1255,7 +1255,7 @@ more cache friendly.
 
  .. code-block:: none
 
-    loop permutation: 0.115943
+    loop permutation: 0.118269
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1353,7 +1353,7 @@ optimized schedule.
 
  .. code-block:: none
 
-    array packing: 0.108847
+    array packing: 0.109269
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1445,7 +1445,7 @@ to `C` when all the block results are ready.
 
  .. code-block:: none
 
-    block caching: 0.110194
+    block caching: 0.110208
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1530,7 +1530,7 @@ of thread-level parallelization.
 
  .. code-block:: none
 
-    parallelization: 0.145552
+    parallelization: 0.145708
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1024, 1024], []),
@@ -1610,13 +1610,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none      3.3321282782000003                     1.0
-                blocking            0.2891165662     0.08676633732605857
-           vectorization            0.3257144058     0.09774965985881832
-        loop permutation            0.1159430191     0.03479548487329902
-           array packing     0.10884659699999999     0.03266578832277081
-           block caching            0.1101939036     0.03307012647770158
-         parallelization            0.1455522765     0.04368147452553256
+                    none      3.2357280281999996                     1.0
+                blocking             0.292657104      0.0904455199724564
+           vectorization     0.33443425390000003     0.10335672559168768
+        loop permutation             0.118268963     0.03655095915641332
+           array packing     0.10926860489999998     0.03376940334530678
+           block caching            0.1102082691     0.03405980605894979
+         parallelization            0.1457075899     0.04503085198450858
 
 
 
diff --git a/docs/commit_hash b/docs/commit_hash
index 6547ddc1d0..f33fb38ce3 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-30abbe98321acf594d2cd0d6b9a7c570471d9264
+088bc118c7a0abd263b634dc88be59813652251c
diff --git a/docs/how_to/compile_models/from_darknet.html b/docs/how_to/compile_models/from_darknet.html
index 238619ba46..a41850b221 100644
--- a/docs/how_to/compile_models/from_darknet.html
+++ b/docs/how_to/compile_models/from_darknet.html
@@ -585,7 +585,7 @@ class:[&#39;truck 0.9266&#39;] left:471 top:83 right:689 bottom:169
 class:[&#39;bicycle 0.9984&#39;] left:111 top:113 right:577 bottom:447
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  6.880 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.786 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-darknet-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7716f96385bd5abb6e822041e285be54/from_darknet.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_darknet.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_keras.html b/docs/how_to/compile_models/from_keras.html
index 27b2d1fa50..a4dd750674 100644
--- a/docs/how_to/compile_models/from_keras.html
+++ b/docs/how_to/compile_models/from_keras.html
@@ -506,7 +506,7 @@ Tensorflow is also required since it’s used as the default backend of keras.</
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Relay top-1 id: 285, class name: Egyptian cat
 
 1/1 [==============================] - ETA: 0s
-1/1 [==============================] - 1s 933ms/step
+1/1 [==============================] - 1s 954ms/step
 Keras top-1 id: 285, class name: Egyptian cat
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 5f81ccdbe5..aeda6e98f1 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -439,7 +439,7 @@
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip5b2e833d-6156-4871-b906-654707010ba8 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip1e507af6-40d4-4cf9-96c2-f151e3077678 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index f3e6e2745f..a2b3938a5f 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -449,11 +449,13 @@ Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdo
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
- 23%|##2       | 9.38M/41.5M [00:00&lt;00:00, 98.4MB/s]
- 45%|####5     | 18.8M/41.5M [00:00&lt;00:00, 87.4MB/s]
- 66%|######5   | 27.2M/41.5M [00:00&lt;00:00, 75.4MB/s]
- 83%|########3 | 34.5M/41.5M [00:00&lt;00:00, 75.5MB/s]
-100%|##########| 41.5M/41.5M [00:00&lt;00:00, 68.7MB/s]
+ 15%|#5        | 6.33M/41.5M [00:00&lt;00:00, 55.2MB/s]
+ 28%|##7       | 11.6M/41.5M [00:00&lt;00:00, 55.1MB/s]
+ 41%|####      | 16.9M/41.5M [00:00&lt;00:00, 52.5MB/s]
+ 58%|#####7    | 24.0M/41.5M [00:00&lt;00:00, 51.2MB/s]
+ 76%|#######6  | 31.7M/41.5M [00:00&lt;00:00, 58.7MB/s]
+ 91%|######### | 37.6M/41.5M [00:00&lt;00:00, 59.7MB/s]
+100%|##########| 41.5M/41.5M [00:00&lt;00:00, 55.1MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 4f2f286450..4eea815d58 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -432,10 +432,12 @@ be unstable.</p>
 Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 28%|##7       | 12.3M/44.7M [00:00&lt;00:00, 129MB/s]
- 55%|#####5    | 24.6M/44.7M [00:00&lt;00:00, 109MB/s]
- 79%|#######8  | 35.2M/44.7M [00:00&lt;00:00, 106MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 106MB/s]
+ 18%|#7        | 7.99M/44.7M [00:00&lt;00:00, 76.3MB/s]
+ 40%|####      | 18.1M/44.7M [00:00&lt;00:00, 89.9MB/s]
+ 60%|#####9    | 26.6M/44.7M [00:00&lt;00:00, 88.5MB/s]
+ 80%|#######9  | 35.6M/44.7M [00:00&lt;00:00, 90.4MB/s]
+ 99%|#########8| 44.2M/44.7M [00:00&lt;00:00, 89.7MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 89.4MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index df07e43fa3..ef3a7d62ab 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -649,7 +649,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.111 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  11.754 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index 72e6bdedf1..ff94f6d29f 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:33.870</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>05:50.437</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -349,43 +349,43 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:10.111</p></td>
+<td><p>01:11.754</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>01:06.880</p></td>
+<td><p>01:10.786</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>00:45.810</p></td>
+<td><p>00:49.910</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:31.605</p></td>
+<td><p>00:34.675</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:28.422</p></td>
+<td><p>00:30.955</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:25.666</p></td>
+<td><p>00:26.373</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:24.498</p></td>
+<td><p>00:24.948</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:22.170</p></td>
+<td><p>00:22.015</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:16.280</p></td>
+<td><p>00:16.634</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.426</p></td>
+<td><p>00:02.385</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_adreno.html b/docs/how_to/deploy_models/deploy_model_on_adreno.html
index 6a5e6381df..8e5320200c 100644
--- a/docs/how_to/deploy_models/deploy_model_on_adreno.html
+++ b/docs/how_to/deploy_models/deploy_model_on_adreno.html
@@ -920,10 +920,9 @@ Top5 predictions:
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
- 3340.8585    3339.3044    3354.8016    3335.9312      5.3039
+ 2846.1168    2831.2393    2933.0375    2802.9358     45.3458
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  0.638 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/2387d8448da213eb625e6b3d916327d4/deploy_model_on_adreno.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_model_on_adreno.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 8bf2f75e19..5f35d66aef 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -662,7 +662,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  15.8475      15.6642      16.5656      15.4942       0.3540
+  16.8016      16.6977      17.8308      16.6099       0.3473
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index 9158dccf65..84786a535d 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -454,23 +454,25 @@ be unstable.</p>
 Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
-  7%|7         | 12.1M/170M [00:00&lt;00:01, 127MB/s]
- 14%|#4        | 24.2M/170M [00:00&lt;00:01, 109MB/s]
- 20%|##        | 34.8M/170M [00:00&lt;00:01, 106MB/s]
- 26%|##6       | 44.9M/170M [00:00&lt;00:01, 104MB/s]
- 32%|###2      | 54.9M/170M [00:00&lt;00:01, 102MB/s]
- 38%|###8      | 64.6M/170M [00:00&lt;00:01, 96.6MB/s]
- 45%|####4     | 75.7M/170M [00:00&lt;00:00, 102MB/s]
- 50%|#####     | 85.6M/170M [00:00&lt;00:00, 102MB/s]
- 56%|#####6    | 95.3M/170M [00:00&lt;00:00, 101MB/s]
- 62%|######1   | 105M/170M [00:01&lt;00:00, 95.3MB/s]
- 69%|######8   | 116M/170M [00:01&lt;00:00, 102MB/s]
- 74%|#######4  | 126M/170M [00:01&lt;00:00, 102MB/s]
- 80%|########  | 136M/170M [00:01&lt;00:00, 95.7MB/s]
- 87%|########6 | 148M/170M [00:01&lt;00:00, 103MB/s]
- 93%|#########2| 157M/170M [00:01&lt;00:00, 101MB/s]
- 98%|#########8| 167M/170M [00:01&lt;00:00, 101MB/s]
-100%|##########| 170M/170M [00:01&lt;00:00, 101MB/s]
+  5%|4         | 7.99M/170M [00:00&lt;00:03, 49.2MB/s]
+  9%|9         | 16.1M/170M [00:00&lt;00:02, 65.1MB/s]
+ 14%|#4        | 24.1M/170M [00:00&lt;00:02, 72.5MB/s]
+ 19%|#8        | 32.1M/170M [00:00&lt;00:01, 73.7MB/s]
+ 24%|##3       | 40.1M/170M [00:00&lt;00:01, 76.0MB/s]
+ 28%|##8       | 48.0M/170M [00:00&lt;00:01, 78.2MB/s]
+ 38%|###7      | 64.0M/170M [00:00&lt;00:01, 87.8MB/s]
+ 44%|####3     | 74.1M/170M [00:00&lt;00:01, 92.6MB/s]
+ 49%|####9     | 83.7M/170M [00:01&lt;00:00, 94.9MB/s]
+ 55%|#####4    | 92.8M/170M [00:01&lt;00:00, 93.1MB/s]
+ 60%|#####9    | 102M/170M [00:01&lt;00:00, 80.2MB/s]
+ 65%|######4   | 110M/170M [00:01&lt;00:00, 66.2MB/s]
+ 71%|#######   | 120M/170M [00:01&lt;00:00, 68.0MB/s]
+ 78%|#######7  | 132M/170M [00:01&lt;00:00, 81.7MB/s]
+ 83%|########2 | 141M/170M [00:01&lt;00:00, 79.7MB/s]
+ 88%|########7 | 149M/170M [00:02&lt;00:00, 73.9MB/s]
+ 92%|#########1| 156M/170M [00:02&lt;00:00, 73.3MB/s]
+ 98%|#########7| 166M/170M [00:02&lt;00:00, 82.0MB/s]
+100%|##########| 170M/170M [00:02&lt;00:00, 76.7MB/s]
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/nn/functional.py:3897: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:124: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=& [...]
@@ -568,7 +570,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  7.575 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  15.325 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index c526b5660b..41da5fd831 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -498,7 +498,8 @@ training. Other models require a full post training calibration.</p>
 Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 149MB/s]
+ 59%|#####8    | 7.99M/13.6M [00:00&lt;00:00, 81.8MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 102MB/s]
 </pre></div>
 </div>
 </div>
@@ -589,7 +590,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.0417      89.9371      93.6422      89.7781       0.4685
+  94.9061      94.5728      99.8493      89.9776       3.2943
 </pre></div>
 </div>
 <div class="admonition note">
@@ -628,7 +629,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.745 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  6.252 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index 595f2d5c85..4313f9d625 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -583,7 +583,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  118.5336     118.5244     124.8535     116.4806      1.0182
+  120.3976     120.3663     123.8272     119.6521      0.4755
 </pre></div>
 </div>
 <div class="admonition note">
@@ -611,7 +611,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  34.865 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  24.856 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 13ee14a37d..37afdf2e11 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -521,7 +521,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  28.151 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  27.633 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index cb96aceb31..a6c30f583d 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -463,24 +463,24 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  4%|3         | 4845/132723 [00:00&lt;00:02, 48441.85KB/s]
-  9%|9         | 12273/132723 [00:00&lt;00:01, 63638.23KB/s]
- 15%|#5        | 19975/132723 [00:00&lt;00:01, 69746.15KB/s]
- 21%|##        | 27663/132723 [00:00&lt;00:01, 72561.03KB/s]
- 27%|##6       | 35358/132723 [00:00&lt;00:01, 74141.90KB/s]
- 32%|###2      | 43043/132723 [00:00&lt;00:01, 75061.67KB/s]
- 38%|###8      | 50775/132723 [00:00&lt;00:01, 75798.55KB/s]
- 44%|####4     | 58517/132723 [00:00&lt;00:00, 76310.80KB/s]
- 50%|####9     | 66263/132723 [00:00&lt;00:00, 76667.55KB/s]
- 56%|#####5    | 74045/132723 [00:01&lt;00:00, 77021.35KB/s]
- 62%|######1   | 81763/132723 [00:01&lt;00:00, 77067.06KB/s]
- 67%|######7   | 89511/132723 [00:01&lt;00:00, 77188.55KB/s]
- 73%|#######3  | 97258/132723 [00:01&lt;00:00, 77271.70KB/s]
- 79%|#######9  | 105030/132723 [00:01&lt;00:00, 77402.61KB/s]
- 85%|########4 | 112797/132723 [00:01&lt;00:00, 77478.21KB/s]
- 91%|######### | 120577/132723 [00:01&lt;00:00, 77572.35KB/s]
- 97%|#########6| 128410/132723 [00:01&lt;00:00, 77795.38KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 75605.07KB/s]
+  2%|2         | 2717/132723 [00:00&lt;00:04, 27153.97KB/s]
+  8%|7         | 10158/132723 [00:00&lt;00:02, 54943.45KB/s]
+ 14%|#3        | 18244/132723 [00:00&lt;00:01, 66772.88KB/s]
+ 20%|#9        | 26332/132723 [00:00&lt;00:01, 72337.72KB/s]
+ 26%|##5       | 34401/132723 [00:00&lt;00:01, 75347.70KB/s]
+ 32%|###2      | 42565/132723 [00:00&lt;00:01, 77485.04KB/s]
+ 38%|###8      | 50747/132723 [00:00&lt;00:01, 78896.02KB/s]
+ 44%|####4     | 58915/132723 [00:00&lt;00:00, 79781.15KB/s]
+ 51%|#####     | 67058/132723 [00:00&lt;00:00, 80295.51KB/s]
+ 57%|#####6    | 75218/132723 [00:01&lt;00:00, 80697.10KB/s]
+ 63%|######2   | 83423/132723 [00:01&lt;00:00, 81109.68KB/s]
+ 69%|######9   | 91639/132723 [00:01&lt;00:00, 81426.02KB/s]
+ 75%|#######5  | 99832/132723 [00:01&lt;00:00, 81577.76KB/s]
+ 81%|########1 | 108029/132723 [00:01&lt;00:00, 81693.08KB/s]
+ 88%|########7 | 116216/132723 [00:01&lt;00:00, 81737.38KB/s]
+ 94%|#########3| 124390/132723 [00:01&lt;00:00, 81411.82KB/s]
+100%|#########9| 132532/132723 [00:01&lt;00:00, 80315.84KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 77563.74KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -519,7 +519,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  3.701 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  20.215 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index ba32f131a3..87cf605e36 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>13:44.485</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>14:01.296</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -348,44 +348,44 @@
 <col style="width: 6%" />
 </colgroup>
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>03:07.575</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
+<td><p>03:20.215</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>03:03.701</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
+<td><p>03:15.325</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>02:34.865</p></td>
+<td><p>02:24.856</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:28.151</p></td>
+<td><p>01:27.633</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:04.745</p></td>
+<td><p>01:06.252</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_adreno.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-py"><span class="std std-ref">Deploy the Pretrained Model on Adreno</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_adreno.py</span></code>)</p></td>
-<td><p>01:00.638</p></td>
+<td><p>00:56.856</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:34.717</p></td>
+<td><p>00:38.381</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_nano.html#sphx-glr-how-to-deploy-models-deploy-model-on-nano-py"><span class="std std-ref">Deploy the Pretrained Model on Jetson Nano</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_nano.py</span></code>)</p></td>
-<td><p>00:25.260</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
+<td><p>00:26.751</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:24.827</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_nano.html#sphx-glr-how-to-deploy-models-deploy-model-on-nano-py"><span class="std std-ref">Deploy the Pretrained Model on Jetson Nano</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_nano.py</span></code>)</p></td>
+<td><p>00:25.021</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
-<td><p>00:00.007</p></td>
+<td><p>00:00.006</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 44a07ebbfd..57306c54a3 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -622,7 +622,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipa0a8e594-1da9-4714-9833-f7e4fd889ea3 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip9dcdd43b-c5e0-42fb-9396-dfe32c896d74 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index 41a8f9e606..6e8973cde3 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:46.260</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:47.640</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,19 +349,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:42.925</p></td>
+<td><p>00:43.843</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.327</p></td>
+<td><p>00:02.677</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:01.000</p></td>
+<td><p>00:01.111</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
-<td><p>00:00.007</p></td>
+<td><p>00:00.008</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index 91cdbec8f0..710c192c5f 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -526,10 +526,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 7092us [7092us] (46.21%; 46.21%)
-FoldScaleAxis: 8256us [7us] (53.79%; 53.79%)
-        FoldConstant: 8250us [1701us] (53.75%; 99.92%)
-                InferType: 6549us [6549us] (42.67%; 79.39%)
+InferType: 7870us [7870us] (46.52%; 46.52%)
+FoldScaleAxis: 9047us [7us] (53.48%; 53.48%)
+        FoldConstant: 9040us [1849us] (53.44%; 99.92%)
+                InferType: 7191us [7191us] (42.51%; 79.54%)
 </pre></div>
 </div>
 </div>
@@ -551,10 +551,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6621us [6621us] (45.31%; 45.31%)
-FoldScaleAxis: 7992us [5us] (54.69%; 54.69%)
-        FoldConstant: 7987us [1682us] (54.66%; 99.94%)
-                InferType: 6305us [6305us] (43.15%; 78.94%)
+InferType: 7257us [7257us] (44.60%; 44.60%)
+FoldScaleAxis: 9013us [5us] (55.40%; 55.40%)
+        FoldConstant: 9008us [1905us] (55.37%; 99.95%)
+                InferType: 7103us [7103us] (43.66%; 78.85%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index d0b526a903..1d48fb6dcc 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -578,7 +578,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 35.913761 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.166305 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index d619553fd9..5b581ab051 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -915,7 +915,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.342461 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.359718 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index 0607a3d909..14966ff035 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -475,8 +475,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018220
-Baseline: 3.325094
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.020501
+Baseline: 3.317244
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -535,7 +535,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.296584
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.328635
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -601,7 +601,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.329013
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.346811
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -661,7 +661,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.113831
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.126123
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -743,7 +743,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.109471
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.119191
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -828,7 +828,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.110779
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.121588
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -917,7 +917,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.146559
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.156732
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index d511b1998d..931d08d29b 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:34.501</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:36.448</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:31.855</p></td>
+<td><p>00:33.768</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.541</p></td>
+<td><p>00:01.579</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:01.105</p></td>
+<td><p>00:01.102</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index d0e16c23af..82164fbd21 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>09:02.544</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>09:17.784</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -349,27 +349,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>05:41.286</p></td>
+<td><p>05:46.184</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:30.702</p></td>
+<td><p>01:33.595</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>01:00.923</p></td>
+<td><p>01:03.655</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:26.801</p></td>
+<td><p>00:29.103</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:11.840</p></td>
+<td><p>00:13.153</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:10.991</p></td>
+<td><p>00:12.093</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index c8c16d39b6..79229b5e98 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -504,163 +504,484 @@ cooperative fetching, unrolling and operator fusion.</p>
              bias: Buffer(bias_2: Pointer(float32), float32, [1, 512, 1, 1], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [1, 512, 7, 7], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [4032]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [1], [], scope=&quot;local&quot;, align=4)[0] = 0f32
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 28;
+  allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
     conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[2] = 0f32
     conv2d_nchw_1[3] = 0f32
-    for (rc.outer.outer: int32, 0, 8) {
+    conv2d_nchw_1[4] = 0f32
+    conv2d_nchw_1[5] = 0f32
+    conv2d_nchw_1[6] = 0f32
+    conv2d_nchw_1[7] = 0f32
+    conv2d_nchw_1[8] = 0f32
+    conv2d_nchw_1[9] = 0f32
+    conv2d_nchw_1[10] = 0f32
+    conv2d_nchw_1[11] = 0f32
+    conv2d_nchw_1[12] = 0f32
+    conv2d_nchw_1[13] = 0f32
+    for (rc.outer.outer: int32, 0, 64) {
       for (ry.outer.outer: int32, 0, 3) {
-        let cse_var_4: int32 = (rc.outer.outer*3136)
-        let cse_var_3: int32 = (ry.outer.outer*7)
-        let cse_var_2: int32 = (rc.outer.outer*576)
+        let cse_var_2: int32 = (rc.outer.outer*72)
         let cse_var_1: int32 = (ry.outer.outer*3)
          {
-          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1: Buffer(pad_temp.shared, float32, [4032], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data_3: Buffer(data_2, float32, [25088], [])[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 98)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 98), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 196)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 196), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 294)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 294), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 392), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 490)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 490), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 588)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 588), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 686)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 686), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 1), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 1), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 784), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 882)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 678)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 980)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 980), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1078)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1078), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1176)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1176), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1274)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1274), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1372)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1372), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1470)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1470), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1568)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1568), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1666)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 1), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 1), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1666), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1764)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 1364)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1862)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1862), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 1960)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 1960), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2058)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2058), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2156)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2156), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2254)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2254), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2352)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2352), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2450)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2450), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2548)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 1), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 1), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2548), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2646)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 2050)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2744)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2744), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2842)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2842), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 2940)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 2940), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3038)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3038), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3136)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3136), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3234)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3234), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3332)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3332), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3430)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 1), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 1), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3430), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3528)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 2736)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3626)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3626), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3724)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3724), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3822)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3822), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          pad_temp.shared_1[(threadIdx.x_1 + 3920)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 3920), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          if @tir.likely((threadIdx.x_1 &lt; 14), dtype=bool) {
-            pad_temp.shared_1[(threadIdx.x_1 + 4018)] = @tir.if_then_else(((((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data_3[((((cse_var_4 + (floordiv((threadIdx.x_1 + 4018), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[(((((blockIdx.x*36864) + cse_var_2) + (floordiv(threadIdx.x_2, 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 98)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 98), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 98), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 196)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 196), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 4), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 294)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 294), 192)*4608)) + cse_var_2) + (floormod((floordiv(threadIdx.x_2, 3) + 34), 64)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 392)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 392), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 490)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 490), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 106), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 588)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 588), 192)*4608)) + cse_var_2) + ((floordiv(threadIdx.x_2, 3) + 4)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 686)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 686), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 110), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 784)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 784), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 882)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 882), 192)*4608)) + cse_var_2) + (floormod((floordiv(threadIdx.x_2, 3) + 38), 64)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 980)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 980), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 20), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 1078)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1078), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 118), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 1176)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1176), 192)*4608)) + cse_var_2) + ((floordiv(threadIdx.x_2, 3) + 8)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 1274)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1274), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 122), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          kernel.shared_1[(threadIdx.x_2 + 1372)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1372), 192)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 28), 192), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 98;
-          if @tir.likely((threadIdx.x_2 &lt; 66), dtype=bool) {
-            kernel.shared_1[(threadIdx.x_2 + 1470)] = kernel_3[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 1470), 192)*4608)) + cse_var_2) + ((floordiv(threadIdx.x_2, 3) + 42)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          }
-          for (rc.inner: int32, 0, 64) {
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[((floordiv(threadIdx.x, 49)*192) + (rc.inner*3))]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 384)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 768)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1152)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 385)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 769)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1153)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 2)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 386)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 770)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[((((rc.inner*63) + (floordiv(floormod(threadIdx.x, 49), 7)*9)) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*192) + (rc.inner*3)) + 1154)]))
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope=&quot;shared&quot;)[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), data_3: Buffer(data_2, float32, [25088], [])[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.out [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), data_3[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], [...]
+            }
           }
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel_3: Buffer(kernel_2, float32, [2359296], [])[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 64)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 64), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 128)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 128), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 192)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 256)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 256), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 320)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 320), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 384)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 512)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 512), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 576)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 640)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 640), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 704)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 704), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 768)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 832)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 832), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 960)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1024), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1088), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1216), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1280), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1408), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1472), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1600), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1664), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1792), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1856), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1984), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2048), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2176), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2240), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2368), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2432), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2560), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2624), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2752), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2816), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel_3[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2944), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel_3[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 3008), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
         }
       }
     }
-    compute_3: Buffer(compute_2, float32, [25088], [])[((blockIdx.x*392) + threadIdx.x)] = max((conv2d_nchw_1[0] + bias_3: Buffer(bias_2, float32, [512], [])[((blockIdx.x*8) + floordiv(threadIdx.x, 49))]), 0f32)
-    compute_3[(((blockIdx.x*392) + threadIdx.x) + 98)] = max((conv2d_nchw_1[1] + bias_3[(((blockIdx.x*8) + floordiv(threadIdx.x, 49)) + 2)]), 0f32)
-    compute_3[(((blockIdx.x*392) + threadIdx.x) + 196)] = max((conv2d_nchw_1[2] + bias_3[(((blockIdx.x*8) + floordiv(threadIdx.x, 49)) + 4)]), 0f32)
-    compute_3[(((blockIdx.x*392) + threadIdx.x) + 294)] = max((conv2d_nchw_1[3] + bias_3[(((blockIdx.x*8) + floordiv(threadIdx.x, 49)) + 6)]), 0f32)
+    for (i1.inner: int32, 0, 2) {
+      for (i3.inner: int32, 0, 7) {
+        compute_3: Buffer(compute_2, float32, [25088], [])[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias_3: Buffer(bias_2, float32, [512], [])[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
+      }
+    }
   }
 }
 </pre></div>
@@ -696,7 +1017,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.280 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.351 ms
 </pre></div>
 </div>
 </div>
@@ -726,35 +1047,35 @@ conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
 conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=2)
-conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=4)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
+conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=64)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=1)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
 conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
 conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
-conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=3)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
+conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=2)
-compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=4)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
 compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -774,12 +1095,12 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=98)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=98)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
@@ -799,100 +1120,431 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(98) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[4];
-  __shared__ float pad_temp_shared[4032];
-  __shared__ float kernel_shared[1536];
+extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[14];
+  __shared__ float pad_temp_shared[72];
+  __shared__ float kernel_shared[3072];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
   conv2d_nchw[3] = 0.000000e+00f;
-  for (int rc_outer_outer = 0; rc_outer_outer &lt; 8; ++rc_outer_outer) {
+  conv2d_nchw[4] = 0.000000e+00f;
+  conv2d_nchw[5] = 0.000000e+00f;
+  conv2d_nchw[6] = 0.000000e+00f;
+  conv2d_nchw[7] = 0.000000e+00f;
+  conv2d_nchw[8] = 0.000000e+00f;
+  conv2d_nchw[9] = 0.000000e+00f;
+  conv2d_nchw[10] = 0.000000e+00f;
+  conv2d_nchw[11] = 0.000000e+00f;
+  conv2d_nchw[12] = 0.000000e+00f;
+  conv2d_nchw[13] = 0.000000e+00f;
+  for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
     for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
       __syncthreads();
-      pad_temp_shared[((int)threadIdx.x)] = (((((1 &lt;= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 98)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 98) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 196)] = (((((1 &lt;= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 196) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 294)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 294) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 392)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 392) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 490)] = (((((1 &lt;= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 490) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 588)] = (((((1 &lt;= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 588) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 686)] = (((((1 &lt;= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 686) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 &lt;= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 1) % 9))) &amp;&amp; (((((int)threadIdx.x) + 1) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 784) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 882)] = (((((1 &lt;= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 678)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 980)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 980) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1078)] = (((((1 &lt;= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1078) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1176)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1176) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1274)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1274) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1372)] = (((((1 &lt;= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1372) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1470)] = (((((1 &lt;= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1470) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1568)] = (((((1 &lt;= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1568) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1666)] = (((((1 &lt;= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 1) % 9))) &amp;&amp; (((((int)threadIdx.x) + 1) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1666) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1764)] = (((((1 &lt;= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 1364)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1862)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1862) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 1960)] = (((((1 &lt;= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 1960) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2058)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2058) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2156)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2156) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2254)] = (((((1 &lt;= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2254) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2352)] = (((((1 &lt;= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2352) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2450)] = (((((1 &lt;= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2450) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2548)] = (((((1 &lt;= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 1) % 9))) &amp;&amp; (((((int)threadIdx.x) + 1) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2548) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2646)] = (((((1 &lt;= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 2050)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2744)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2744) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2842)] = (((((1 &lt;= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2842) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 2940)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 2940) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3038)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3038) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3136)] = (((((1 &lt;= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3136) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3234)] = (((((1 &lt;= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3234) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3332)] = (((((1 &lt;= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3332) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3430)] = (((((1 &lt;= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 1) % 9))) &amp;&amp; (((((int)threadIdx.x) + 1) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3430) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3528)] = (((((1 &lt;= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 2736)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3626)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3626) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3724)] = (((((1 &lt;= ((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 7) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3724) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3822)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3822) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 3920)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 3920) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
-      if (((int)threadIdx.x) &lt; 14) {
-        pad_temp_shared[(((int)threadIdx.x) + 4018)] = (((((((((int)threadIdx.x) + 49) / 9) + ry_outer_outer) &lt; 8) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 3136) + (((((int)threadIdx.x) + 4018) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 9))) &amp;&amp; (((((int)threadIdx.x) * 4) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
       }
-      kernel_shared[((int)threadIdx.x)] = kernel[(((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 98)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 98) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 98) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 196)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 196) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 4) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 294)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 294) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) / 3) + 34) &amp; 63) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 392) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 8) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 490)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 490) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 106) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 588)] = kernel[(((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 588) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36)];
-      kernel_shared[(((int)threadIdx.x) + 686)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 686) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 110) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 784)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 784) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 16) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 882)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 882) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) / 3) + 38) &amp; 63) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 980)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 980) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 20) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1078)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1078) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 118) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1176)] = kernel[(((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1176) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 72)];
-      kernel_shared[(((int)threadIdx.x) + 1274)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1274) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((((int)threadIdx.x) + 122) % 192) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1372)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1372) / 192) * 4608)) + (rc_outer_outer * 576)) + (((((int)threadIdx.x) + 28) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      if (((int)threadIdx.x) &lt; 66) {
-        kernel_shared[(((int)threadIdx.x) + 1470)] = kernel[(((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 1470) / 192) * 4608)) + (rc_outer_outer * 576)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 378)];
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
       }
-      __syncthreads();
-      for (int rc_inner = 0; rc_inner &lt; 64; ++rc_inner) {
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3))]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 384)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 768)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7))] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1152)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 385)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 769)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1153)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 2)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 386)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 770)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((rc_inner * 63) + (((((int)threadIdx.x) % 49) / 7) * 9)) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 49) * 192) + (rc_inner * 3)) + 1154)]));
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
       }
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
+      }
+      kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
+      kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
+      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
+      kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
+      kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
+      kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
+      kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+      kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
+      kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
+      kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
+      kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
+      kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
+      kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
+      kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
+      kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
+      kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      __syncthreads();
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+    }
+  }
+  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
+    for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
+      compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
     }
   }
-  compute[((((int)blockIdx.x) * 392) + ((int)threadIdx.x))] = max((conv2d_nchw[0] + bias[((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49))]), 0.000000e+00f);
-  compute[(((((int)blockIdx.x) * 392) + ((int)threadIdx.x)) + 98)] = max((conv2d_nchw[1] + bias[(((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49)) + 2)]), 0.000000e+00f);
-  compute[(((((int)blockIdx.x) * 392) + ((int)threadIdx.x)) + 196)] = max((conv2d_nchw[2] + bias[(((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49)) + 4)]), 0.000000e+00f);
-  compute[(((((int)blockIdx.x) * 392) + ((int)threadIdx.x)) + 294)] = max((conv2d_nchw[3] + bias[(((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 49)) + 6)]), 0.000000e+00f);
 }
 </pre></div>
 </div>
@@ -928,7 +1580,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  41.286 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  46.184 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index 19e3cee3e0..af098dfdd9 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -916,7 +916,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   7.8845       7.8834       7.8916       7.8784       0.0055
+   7.8521       7.8516       7.8537       7.8510       0.0011
 </pre></div>
 </div>
 </div>
@@ -938,7 +938,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  0.923 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  3.655 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/eafe360d52540634c9eea0fa89e804bd/tune_network_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index 672f868d74..acabd58fef 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -935,7 +935,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  744.5041     744.4052     745.8627     743.2444      1.0712
+  757.8213     756.5935     760.5049     756.3655      1.8999
 </pre></div>
 </div>
 </div>
@@ -957,7 +957,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  30.702 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  33.595 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index 890c7569b3..d83831c078 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -633,25 +633,27 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [128, 512], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [128, 512], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute} {
-  for (i0.outer.i1.outer.fused: int32, 0, 2048) &quot;parallel&quot; {
-    allocate(compute_3: Pointer(global float32), float32, [32]), storage_scope = global {
-      for (i.outer.inner: int32, 0, 2) {
+  for (i0.outer.i1.outer.fused: int32, 0, 512) &quot;parallel&quot; {
+    allocate(compute_3: Pointer(global float32), float32, [128]), storage_scope = global {
+      for (i.outer.inner: int32, 0, 8) {
         for (j.init: int32, 0, 16) {
-          compute_4: Buffer(compute_3, float32, [32], [])[((i.outer.inner*16) + j.init)] = 0f32
+          compute_4: Buffer(compute_3, float32, [128], [])[((i.outer.inner*16) + j.init)] = 0f32
         }
         for (elem_idx: int32, 0, let cse_var_1: int32 = floormod(i0.outer.i1.outer.fused, 32) in (placeholder_15: Buffer(placeholder_13, int32, [33], [])[(cse_var_1 + 1)] - placeholder_15[cse_var_1])) {
           for (j: int32, 0, 16) {
             let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32)
             if @tir.likely((elem_idx &lt; (placeholder_15[(cse_var_2 + 1)] - placeholder_15[cse_var_2])), dtype=bool) {
               let cse_var_3: int32 = ((i.outer.inner*16) + j)
-              compute_4[cse_var_3] = (compute_4[cse_var_3] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[(((placeholder_15[cse_var_2]*16) + (elem_idx*16)) + j)]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[(((floordiv(i0.outer.i1.outer.fused, 32)*512) + (i.outer.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[cse_var_2] + elem_idx)])], 0f32)))
+              compute_4[cse_var_3] = (compute_4[cse_var_3] + (placeholder_16: Buffer(placeholder_11, float32, [78656], [])[(((placeholder_15[cse_var_2]*16) + (elem_idx*16)) + j)]*max(placeholder_17: Buffer(placeholder_10, float32, [32768], [])[(((floordiv(i0.outer.i1.outer.fused, 32)*2048) + (i.outer.inner*256)) + placeholder_18: Buffer(placeholder_12, int32, [4916], [])[(placeholder_15[cse_var_2] + elem_idx)])], 0f32)))
             }
           }
         }
       }
-      for (i0.inner: int32, 0, 2) {
-        let cse_var_4: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*1024) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
-        compute_5: Buffer(compute_2, float32, [65536], [])[ramp(cse_var_4, 1, 16)] = max((compute_4[ramp((i0.inner*16), 1, 16)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[ramp(cse_var_4, 1, 16)]), broadcast(0f32, 16))
+      for (i0.inner: int32, 0, 8) {
+        for (i1.inner: int32, 0, 16) {
+          let cse_var_4: int32 = ((((floordiv(i0.outer.i1.outer.fused, 32)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16)) + i1.inner)
+          compute_5: Buffer(compute_2, float32, [65536], [])[cse_var_4] = max((compute_4[((i0.inner*16) + i1.inner)] + placeholder_19: Buffer(placeholder_14, float32, [65536], [])[cse_var_4]), 0f32)
+        }
       }
     }
   }
@@ -689,7 +691,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.901 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.916 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index ea7b677236..71de64c622 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:38.048</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:32.700</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:38.012</p></td>
+<td><p>00:32.662</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
-<td><p>00:00.021</p></td>
+<td><p>00:00.022</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
-<td><p>00:00.005</p></td>
+<td><p>00:00.006</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index 84eaf7044b..866730f7c5 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -690,9 +690,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 16, 4, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6150439
-No: 2   GFLOPS: 18.74/18.74     result: MeasureResult(costs=(0.012354673555555556,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.9697260856628418, timestamp=1673058803.3903506)       [(&#39;tile_f&#39;, [-1, 4, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5303507
-No: 3   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 4, 128]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 32]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,727315
+No: 2   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -814,8 +813,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10012157
-No: 4   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 8, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 16]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9226297
+No: 3   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -937,8 +936,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 128]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 8]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6100591
-No: 5   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 128]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4549373
+No: 4   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1060,8 +1059,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3826906
-No: 6   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 64, 4]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 128, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7575394
+No: 5   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1183,8 +1182,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 1, 512]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9029239
-No: 7   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 256]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 256, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7420597
+No: 6   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1306,8 +1305,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,3183625
-No: 8   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 2, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,3428846
+No: 7   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1429,8 +1428,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 8, 64]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7371529
-No: 9   GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 256, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 256]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,3281572
+No: 8   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1552,8 +1551,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 32, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6714222
-No: 10  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6853151
+No: 9   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1675,8 +1674,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 2, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9279551
-No: 11  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 32, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 64, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2441250
+No: 10  GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1798,26 +1797,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 64, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5284901
-No: 12  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
-    res = future.result()
-  File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
-    return self.__get_result()
-  File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 384, in __get_result
-    raise self._exception
-  File &quot;/usr/lib/python3.7/concurrent/futures/thread.py&quot;, line 57, in run
-    result = self.fn(*self.args, **self.kwargs)
-  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 432, in &lt;lambda&gt;
-    worker = lambda *args: self._worker_run(*args)
-  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 401, in _worker_run
-    return proc.recv()
-  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 309, in recv
-    raise TimeoutError()
-TimeoutError
-
-        [(&#39;tile_f&#39;, [-1, 16, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,8797804
-No: 13  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 16, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,3432683
+No: 11  GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1939,8 +1920,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 8, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4597274
-No: 14  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 32, 2]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2236391
+No: 12  GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2062,8 +2043,9 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 128, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,1537190
-No: 15  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 128, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,643650
+No: 13  GFLOPS: 9.16/9.16       result: MeasureResult(costs=(0.025283704749999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.974963188171387, timestamp=1673069124.3009748)        [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 64]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3840760
+No: 14  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2185,8 +2167,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,5138821
-No: 16  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 64, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10305593
+No: 15  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2308,9 +2290,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2282863
-No: 17  GFLOPS: 8.08/18.74      result: MeasureResult(costs=(0.02864277625,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.394887685775757, timestamp=1673058819.6129212)       [(&#39;tile_f&#39;, [-1, 2, 4, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2430456
-No: 18  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3674941
+No: 16  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2432,8 +2413,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 8, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6125490
-No: 19  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 32, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 128]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4243051
+No: 17  GFLOPS: 0.00/9.16       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2555,8 +2536,9 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 16, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 512, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4097759
-No: 20  GFLOPS: 0.00/18.74      result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,927466
+No: 18  GFLOPS: 116.19/116.19   result: MeasureResult(costs=(0.001992447982142857,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.663283586502075, timestamp=1673069128.2002482)        [(&#39;tile_f&#39;, [-1, 8, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7206032
+No: 19  GFLOPS: 0.00/116.19     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2678,7 +2660,130 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 256]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7404097
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9307654
+No: 20  GFLOPS: 0.00/116.19     result: Traceback (most recent call last):
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
+    func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
+    func = build(s, args, target_host=task.target_host, runtime=runtime)
+  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 227, in build
+    input_mod = lower(inputs, args, name=name, binds=binds)
+  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 134, in lower
+    return ffi.lower_schedule(inp, args, name, binds, simple_mode)
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 276, in tvm._ffi._cy3.core.FuncCall
+  File &quot;tvm/_ffi/_cython/./base.pxi&quot;, line 181, in tvm._ffi._cy3.core.CHECK_CALL
+tvm._ffi.base.TVMError: Traceback (most recent call last):
+  24: TVMFuncCall
+        at ../src/runtime/c_runtime_api.cc:477
+  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../include/tvm/runtime/packed_func.h:1217
+  22: Call
+        at ../include/tvm/runtime/packed_func.h:1213
+  21: operator()
+        at ../include/tvm/runtime/packed_func.h:1730
+  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
+        at ../include/tvm/runtime/packed_func.h:1670
+  19: run&lt;&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1645
+  13: operator()
+        at ../src/driver/driver_api.cc:395
+  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
+        at ../src/driver/driver_api.cc:381
+  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
+        at ../src/driver/driver_api.cc:276
+  10: tvm::transform::Pass::operator()(tvm::IRModule) const
+        at ../src/ir/transform.cc:258
+  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/ir/transform.cc:274
+  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/ir/transform.cc:454
+  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/ir/transform.cc:274
+  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/tir/ir/transform.cc:100
+  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
+        at ../include/tvm/runtime/packed_func.h:1749
+  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
+        at ../include/tvm/runtime/packed_func.h:1693
+  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
+        at ../include/tvm/runtime/packed_func.h:1617
+  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../include/tvm/runtime/packed_func.h:1217
+  1: Call
+        at ../include/tvm/runtime/packed_func.h:1213
+  0: operator()
+        at ../src/runtime/c_runtime_api.cc:534
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
+    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+
+Traceback (most recent call last):
+  24: TVMFuncCall
+        at ../src/runtime/c_runtime_api.cc:477
+  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../include/tvm/runtime/packed_func.h:1217
+  22: Call
+        at ../include/tvm/runtime/packed_func.h:1213
+  21: operator()
+        at ../include/tvm/runtime/packed_func.h:1730
+  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
+        at ../include/tvm/runtime/packed_func.h:1670
+  19: run&lt;&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1630
+  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
+        at ../include/tvm/runtime/packed_func.h:1645
+  13: operator()
+        at ../src/driver/driver_api.cc:395
+  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
+        at ../src/driver/driver_api.cc:381
+  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
+        at ../src/driver/driver_api.cc:276
+  10: tvm::transform::Pass::operator()(tvm::IRModule) const
+        at ../src/ir/transform.cc:258
+  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/ir/transform.cc:274
+  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/ir/transform.cc:454
+  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/ir/transform.cc:274
+  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
+        at ../src/tir/ir/transform.cc:100
+  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
+        at ../include/tvm/runtime/packed_func.h:1749
+  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
+        at ../include/tvm/runtime/packed_func.h:1693
+  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
+        at ../include/tvm/runtime/packed_func.h:1617
+  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../include/tvm/runtime/packed_func.h:1217
+  1: Call
+        at ../include/tvm/runtime/packed_func.h:1213
+  0: operator()
+        at ../src/runtime/c_runtime_api.cc:534
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
+    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 8, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6417359
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2717,9 +2822,9 @@ and measure running time.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Finish loading 20 records
 
 Best config:
-[(&#39;tile_f&#39;, [-1, 4, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5303507
+[(&#39;tile_f&#39;, [-1, 8, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7206032
 Finish loading 20 records
-Time cost of this operator: 0.011815
+Time cost of this operator: 0.002243
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index 01e8c0b066..c5201406dd 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -663,10 +663,10 @@ the tuned operator.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.1     98.695   (1, 2, 10, 10, 3)  2       1        [312.1]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.148     0.995    (1, 6, 10, 10)     1       1        [3.148]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.978     0.309    (1, 1, 10, 10, 3)  1       1        [0.978]
-Total_time                                    -                                             316.226   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  339.1     98.737   (1, 2, 10, 10, 3)  2       1        [339.1]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.341     0.973    (1, 6, 10, 10)     1       1        [3.341]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.998     0.291    (1, 1, 10, 10, 3)  1       1        [0.998]
+Total_time                                    -                                             343.439   -        -                  -       -        -
 </pre></div>
 </div>
 </div>
@@ -718,10 +718,10 @@ Total_time                                    -
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  103.3     97.519   (1, 6, 10, 10, 1)  2       1        [103.3]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.775     1.676    (1, 6, 10, 10)     1       1        [1.775]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.853     0.805    (1, 3, 10, 10, 1)  1       1        [0.853]
-Total_time                                    -                                             105.928   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  102.8     97.478   (1, 6, 10, 10, 1)  2       1        [102.8]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.806     1.712    (1, 6, 10, 10)     1       1        [1.806]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.854     0.81     (1, 3, 10, 10, 1)  1       1        [0.854]
+Total_time                                    -                                             105.459   -        -                  -       -        -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_pytorch.html b/docs/how_to/work_with_microtvm/micro_pytorch.html
index 864d306a90..0ab7f2daae 100644
--- a/docs/how_to/work_with_microtvm/micro_pytorch.html
+++ b/docs/how_to/work_with_microtvm/micro_pytorch.html
@@ -453,7 +453,7 @@ download a cat image and preprocess it to use as the model input.</p>
 Downloading: &quot;https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
 
   0%|          | 0.00/3.42M [00:00&lt;?, ?B/s]
-100%|##########| 3.42M/3.42M [00:00&lt;00:00, 153MB/s]
+100%|##########| 3.42M/3.42M [00:00&lt;00:00, 231MB/s]
 /workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
   return LooseVersion(torch_ver) &gt; ver
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
@@ -577,7 +577,7 @@ via the host <cite>main.cc`</cite> or if a Zephyr emulated board is selected as
 Torch top-1 id: 282, class name: tiger cat
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  1.972 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  8.786 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/12b9ecc04c41abaa12022061771821d1/micro_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index 18903ffca8..76b060a7bd 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -523,7 +523,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpa6k21yod/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmp5h8wk54q/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -583,8 +583,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpa6k21yod/images/target contains 8144 images
-/tmp/tmpa6k21yod/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp5h8wk54q/images/target contains 8144 images
+/tmp/tmp5h8wk54q/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -696,13 +696,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 47s - loss: 0.2259 - accuracy: 0.9216 - val_loss: 0.1162 - val_accuracy: 0.9562 - 47s/epoch - 142ms/step
+328/328 - 48s - loss: 0.2220 - accuracy: 0.9247 - val_loss: 0.1724 - val_accuracy: 0.9335 - 48s/epoch - 146ms/step
 Epoch 2/3
-328/328 - 43s - loss: 0.0972 - accuracy: 0.9643 - val_loss: 0.1257 - val_accuracy: 0.9600 - 43s/epoch - 130ms/step
+328/328 - 44s - loss: 0.0987 - accuracy: 0.9647 - val_loss: 0.1322 - val_accuracy: 0.9486 - 44s/epoch - 136ms/step
 Epoch 3/3
-328/328 - 43s - loss: 0.0628 - accuracy: 0.9772 - val_loss: 0.1478 - val_accuracy: 0.9517 - 43s/epoch - 131ms/step
+328/328 - 45s - loss: 0.0816 - accuracy: 0.9699 - val_loss: 0.0834 - val_accuracy: 0.9698 - 45s/epoch - 136ms/step
 
-&lt;keras.callbacks.History object at 0x7f243f09af10&gt;
+&lt;keras.callbacks.History object at 0x7fb9bd706390&gt;
 </pre></div>
 </div>
 </div>
@@ -962,7 +962,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  12.686 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  1.296 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index e8b1e0d4ca..731f3a27b3 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>06:16.992</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>07:15.519</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,27 +349,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>04:12.686</p></td>
+<td><p>05:01.296</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_pytorch.html#sphx-glr-how-to-work-with-microtvm-micro-pytorch-py"><span class="std std-ref">microTVM PyTorch Tutorial</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_pytorch.py</span></code>)</p></td>
-<td><p>01:01.972</p></td>
+<td><p>01:08.786</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:50.713</p></td>
+<td><p>00:53.248</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_aot.html#sphx-glr-how-to-work-with-microtvm-micro-aot-py"><span class="std std-ref">microTVM Host-Driven AoT</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_aot.py</span></code>)</p></td>
-<td><p>00:07.872</p></td>
+<td><p>00:08.145</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:03.747</p></td>
+<td><p>00:04.042</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_reference_vm.html#sphx-glr-how-to-work-with-microtvm-micro-reference-vm-py"><span class="std std-ref">microTVM Reference Virtual Machines</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_reference_vm.py</span></code>)</p></td>
-<td><p>00:00.001</p></td>
+<td><p>00:00.002</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index 688c7e657a..8e3a77eca3 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:43.885</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:45.645</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_pipeline_executor.html#sphx-glr-how-to-work-with-relay-using-pipeline-executor-py"><span class="std std-ref">Using Pipeline Executor in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_pipeline_executor.py</span></code>)</p></td>
-<td><p>00:32.031</p></td>
+<td><p>00:33.432</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:10.205</p></td>
+<td><p>00:10.569</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.642</p></td>
+<td><p>00:01.637</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index 21e26db1d8..26ef0f13d8 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -536,7 +536,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f246f70cb00&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7fb9b89bc9e0&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index 36299b0a07..b0a13f2d27 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:06.869</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:07.617</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,23 +349,23 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:04.329</p></td>
+<td><p>00:04.987</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:01.197</p></td>
+<td><p>00:01.226</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.574</p></td>
+<td><p>00:00.593</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.555</p></td>
+<td><p>00:00.579</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.113</p></td>
+<td><p>00:00.126</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
@@ -373,11 +373,11 @@
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
-<td><p>00:00.029</p></td>
+<td><p>00:00.032</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></td>
-<td><p>00:00.024</p></td>
+<td><p>00:00.026</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index d9b93889f2..2e7a5a300d 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -587,7 +587,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              B: Buffer(B_2: Pointer(float32), float32, [512, 64], []),
              C: Buffer(C_2: Pointer(float32), float32, [1024, 512], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpzylmsdtf/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpzylmsdtf/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpsit_0zwl/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpsit_0zwl/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/install/nnpack.html b/docs/install/nnpack.html
index 1ef28de467..23d2181e9d 100644
--- a/docs/install/nnpack.html
+++ b/docs/install/nnpack.html
@@ -229,7 +229,17 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="index.html">Installing TVM</a><ul class="current">
-<li class="toctree-l2"><a class="reference internal" href="from_source.html">Install from Source</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="from_source.html">Install from Source</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#developers-get-source-from-github">Developers: Get Source from Github</a></li>
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#build-the-shared-library">Build the Shared Library</a></li>
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#python-package-installation">Python Package Installation</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="from_source.html#install-contrib-libraries">Install Contrib Libraries</a><ul class="current">
+<li class="toctree-l4 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#enable-c-tests">Enable C++ Tests</a></li>
+</ul>
+</li>
 <li class="toctree-l2"><a class="reference internal" href="docker.html">Docker Images</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="#conditions">Conditions</a></li>
diff --git a/docs/reference/api/doxygen/classes.html b/docs/reference/api/doxygen/classes.html
index aaa0dda710..a2959e2b15 100644
--- a/docs/reference/api/doxygen/classes.html
+++ b/docs/reference/api/doxygen/classes.html
@@ -119,25 +119,25 @@ $(function() {
 <tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayIterator.html">ArrayIterator</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrsNode.html">DictAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> (<a class="el" href="namespacet [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ArrayNode.html">ArrayNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html">DictDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LetFrame.html">LetFra [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDoc.html">AssertDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html">DictDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__bui [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html">AssertDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DilateAttrs.html">DilateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetNode.html">LetNode</a> (<a cla [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrame.html">AssertFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html">Dilation2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1L [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html">AssertDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DilateAttrs.html">DilateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetNode.html">LetNode</a> (<a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrame.html">AssertFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html">Dilation2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_ [...]
 </td><td valign="top"><a class="el" href="classtvm_1_1TensorAffineType.html">TensorAffineType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
 <tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrameNode.html">AssertFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Div.html">Div</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetPattern.html">LetPa [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmt.html">AssertStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DivNode.html">DivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetPatternNode.html">LetPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm:: [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html">AssertStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html">Doc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetStmt.html">LetStmt</a> (<a class="el" href="namespace [...]
 </td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorComputeOpNode.html">TensorComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
 <tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDoc.html">AssignDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html">DocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html"> [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDocNode.html">AssignDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html">DominatorPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1LinearCongruentialEngi [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMap.html">AttachMap</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPatternNode.html">DominatorPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html">Lis [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMapNode.html">AttachMapNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html">DropoutAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html">ListDo [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDoc.html">AttrAccessDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DurationNode.html">DurationNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDocNode.html">AssignDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html">DominatorPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1LinearCongruentialEngi [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMap.html">AttachMap</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPatternNode.html">DominatorPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html">Lis [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMapNode.html">AttachMapNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html">DropoutAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html">ListDo [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDoc.html">AttrAccessDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DurationNode.html">DurationNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1 [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDocNode.html">AttrAccessDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DynExpandDimsAttrs.html">DynExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_ [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html">AttrDocEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_e"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;e&#160;&#160;</div></td></tr></table>
 </td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Load.html">Load</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1RangeNode.html">RangeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1TensorIntrin.html">TensorIntrin</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td></tr>
 <tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocVisitor.html">AttrDocVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoadNode.html">LoadNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1RatioNode.html">RatioNode</a> (<a class="el" href="namespace [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1AttrError.html">AttrError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1EinsumAttrs.html">EinsumAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilder.html">LocalBuilder</a> (<a class="el" href="namespacetvm_1_1auto__scheduler. [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrExistVisitor.html">AttrExistVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1topi_1_1EinsumEquation.html">EinsumEquation</a> (<a class="el" href="namespacetvm_1_1topi.html">tvm::topi</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilderNode.html">LocalBuilderNode</a> ( [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfo.html">AttrFieldInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrame.html">ElseFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunner.html">Local [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrExistVisitor.html">AttrExistVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1topi_1_1EinsumEquation.html">EinsumEquation</a> (<a class="el" href="namespacetvm_1_1topi.html">tvm::topi</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilderNode.html">LocalBuilderNode</a> ( [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfo.html">AttrFieldInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrame.html">ElseFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunner.html">Local [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfoNode.html">AttrFieldInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrameNode.html">ElseFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRu [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AttrFrame.html">AttrFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoopRV.html">LoopRV</a> (<a class="el" href="na [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AttrFrameNode.html">AttrFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoopRVNode.html">LoopRVNode</a> [...]
@@ -159,8 +159,8 @@ $(function() {
 <tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html">MatrixSetDiagAttrs</a> (<a class="e [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSHashVisitor.html">AttrsSHashVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Max.html">Max</a> (<a class="el" href="namespacetvm_1_1tir. [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmt.html">AttrStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html">AttrTriggerNonDefaultEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html">AttrTriggerNonDefaultEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html">MaxPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::re [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AutoSchedulerLayoutTransformAttrs.html">AutoSchedulerLayoutTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html">ExprPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallback.ht [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool1DAttrs.html">AvgPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprPatternNode.html">ExprPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallback.html">MeasureCallback</a> (<a c [...]
@@ -220,9 +220,9 @@ $(function() {
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferNode.html">BufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Fuse.html">Fuse</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1details_1_1Namer.html">Namer</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__bui [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1FuseNode.html">FuseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NameSupply.html">NameSupply</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160; [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStep.html">FuseStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NameSupplyNode.html">NameSupplyNode</a> (<a class="e [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegion.html">BufferRegion</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStepNode.html">FuseStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray.html">NDArray</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegion.html">BufferRegion</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStepNode.html">FuseStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray.html">NDArray</a> (<a class="el" hr [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegionNode.html">BufferRegionNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_g"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;g&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structtvm_1_1NDArrayContainerTrait.html">NDArrayContainerTrait</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ScheduleNode.html">ScheduleNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeCallNode.html">TypeCallNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;& [...]
+</td><td valign="top"><a class="el" href="structtvm_1_1NDArrayContainerTrait.html">NDArrayContainerTrait</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ScheduleNode.html">ScheduleNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeCallNode.html">TypeCallNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#16 [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html">NdarraySizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ScheduleRule.html">ScheduleRule</a> (<a class="el" href="n [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherAttrs.html">GatherAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NE.html">NE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Builder.html">Builder</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherNDAttrs.html">GatherNDAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NENode.html">NENode</a> (<a class="el" href="namesp [...]
@@ -249,8 +249,8 @@ $(function() {
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CallLoweredAttrs.html">CallLoweredAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_h"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;h&#160;&#160;</div></td></tr></table>
 </td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectPtr.html">ObjectPtr</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectSHashReduce.html">SelectSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1TypePattern.html">TypePattern</a> (<a class="el" href [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrEqual.html">ObjectPtrEqual</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1SelectSHashReduce_3_01T_00_01TraitName_00_01false_01_4.html">SelectSH [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrHash.html">ObjectPtrHash</ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPattern.html">CallPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a> (<a class="el" href="namespacetvm_1_1runt [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrHash.html">ObjectPtrHash</a> (<a class="el" href="namespacetvm_1_1r [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPattern.html">CallPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a>  [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPatternNode.html">CallPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker.html">ObjectTypeChecker</a> (<a class="el" hre [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1CanonicalSimplifier.html">CanonicalSimplifier</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html">Handler&lt; DLDataType &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1Ob [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Cast.html">Cast</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html">Handler&lt; DLDevice &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Map_3_01K_00_01V_01_ [...]
@@ -279,8 +279,8 @@ $(function() {
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html">ComputeDAG</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html">IfThenElseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_p"></a><table border="0" cellspacing="0" cellpadding="0 [...]
 </td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Singleton.html">Singleton</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrameNode.html">VarDefFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td></tr>
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html">ComputeDAGNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html">ImplSEqualReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1SingletonNode.html">Single [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html">ComputeInlineStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href=" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html">ComputeInlineStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFun [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html">ComputeInlineStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href=" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html">ComputeInlineStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFun [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOp.html">ComputeOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFuncSubObj.html">PackedFunc [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html">ComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">PackedFuncValueConverter</a> (<a  [...]
 <tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStep.html">ComputeRootStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs_3_01T_00_01true_01_4.html">ImplVisitAttrs&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtv [...]
@@ -314,12 +314,12 @@ $(function() {
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html">Conv1DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImm.html">IntImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> (<a class="el" href="namespacetvm_1_1relay.htm [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html">Conv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImmNode.html">IntImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternWildcardNode.html">PatternWildcardNode</a> (<a class="el" href="namespacetvm_1_1relay.html" [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html">Conv2DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSet.html">IntSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1PercentNode.html">PercentNode</a> (<a class="el" [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html">Conv2DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetAnalyzer.html">IntSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1PlaceholderOp.html">PlaceholderOp</a> (<a class="el" [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html">Conv2DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetAnalyzer.html">IntSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1PlaceholderOp.html">PlaceholderOp</a> (<a class="el" [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html">Conv2DWinogradNNPACKWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetNode.html">IntSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1PlaceholderOpNode. [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html">Conv2DWinogradNNPACKWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetNode.html">IntSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1PlaceholderOpNode. [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html">Conv3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html">IRBuilder</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerType.html">PointerType</a> (<a c [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html">Conv3DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html">IRBuilderFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerType [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html">IRBuilderFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1 [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html">Conv3DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html">IRBuilderFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerType [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html">IRBuilderFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1 [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html">ConvGemmWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html">IRBuilderNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html">IRDocsifier</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Pool [...]
 <tr><td></td><td></td><td></td><td></td><td></td></tr>
diff --git a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html
index b53904790b..3fb04c8dea 100644
--- a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html
+++ b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html
@@ -81,7 +81,7 @@ $(function() {
 
 <p><code>#include &lt;<a class="el" href="object_8h_source.html">object.h</a>&gt;</code></p>
 
-<p>Inherited by <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Range &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Region &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; T &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; tvm::arith::IterSplitExpr &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt;  [...]
+<p>Inherited by <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Range &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Region &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; T &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; tvm::arith::IterSplitExpr &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt;  [...]
 <div class="dynheader">
 Collaboration diagram for tvm::runtime::ObjectRef:</div>
 <div class="dyncontent">
diff --git a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg
index a5c96703b9..aa6113fd16 100644
--- a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 453)">
 <title>tvm::runtime::ObjectRef</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-453 144,-453 144,4 -4,4"/>
-<!-- Node489 -->
+<!-- Node488 -->
 <g id="node1" class="node">
-<title>Node489</title>
+<title>Node488</title>
 <polygon fill="#bfbfbf" stroke="#000000" points="3,-.5 3,-222.5 137,-222.5 137,-.5 3,-.5"/>
 <text text-anchor="middle" x="70" y="-210.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
 <polyline fill="none" stroke="#000000" points="3,-203.5 137,-203.5 "/>
@@ -34,9 +34,9 @@
 <text text-anchor="start" x="11" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># FFIClearAfterMove()</text>
 <text text-anchor="start" x="11" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetDataPtr()</text>
 </g>
-<!-- Node490 -->
+<!-- Node489 -->
 <g id="node2" class="node">
-<title>Node490</title>
+<title>Node489</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1runtime_1_1ObjectPtr.html" target="_top" xlink:title="{tvm::runtime::ObjectPtr\l\&lt; tvm::runtime::Object \&gt;\n||+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ~ObjectPtr()\l+ swap()\l+ get()\l+ operator&#45;\&gt;()\land 11 more...\l}">
 <polygon fill="#ffffff" stroke="#000000" points="0,-270.5 0,-448.5 140,-448.5 140,-270.5 0,-270.5"/>
 <text text-anchor="start" x="8" y="-436.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectPtr</text>
@@ -58,9 +58,9 @@
 </a>
 </g>
 </g>
-<!-- Node490&#45;&gt;Node489 -->
+<!-- Node489&#45;&gt;Node488 -->
 <g id="edge1" class="edge">
-<title>Node490&#45;&gt;Node489</title>
+<title>Node489&#45;&gt;Node488</title>
 <path fill="none" stroke="#404040" d="M70,-270.3167C70,-258.8765 70,-247.0062 70,-235.1402"/>
 <polygon fill="none" stroke="#404040" points="70.0001,-234.7944 66,-228.7944 70,-222.7944 74,-228.7943 70.0001,-234.7944"/>
 <text text-anchor="middle" x="89.5" y="-244" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer-members.html b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer-members.html
index 25acc192a2..515043ce4a 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer-members.html
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer-members.html
@@ -73,7 +73,9 @@ $(function() {
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a8dfe0e96ba9d3b4cd08e57c20c1eb414">CopyOnWrite</a>(const TNode *node)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">ivmap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a18e378023df3122893ffaf50bb89464e">operator()</a>(Stmt stmt)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtExprMutator.html#afc4dedb17cb5f2057fc8200e6c298fab">VisitExpr</a>(const PrimExpr &amp;e) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtExprMutator.html">tvm::tir::StmtExprMutator</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">protected</span><span class="mlabel">virtual</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">var_remap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtExprMutator.html#afc4dedb17cb5f2057fc8200e6c298fab">VisitExpr</a>(const PrimExpr &amp;e) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtExprMutator.html">tvm::tir::StmtExprMutator</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">protected</span><span class="mlabel">virtual</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a947d190184b5f3d1e406216909a2cf18">VisitExpr_</a>(const VarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">VisitExpr_</a>(const SelectNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abba02eaed632f7b46d078e087f0b4217">VisitExpr_</a>(const RampNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ace95c42561653586686d4e810b3f6760">VisitExpr_</a>(const AddNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
@@ -93,43 +95,42 @@ $(function() {
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3bee1a8d9a9fd0ab7336810682919787">VisitExpr_</a>(const GENode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#aa78adca6635007e175e6b17a4a758d40">VisitExpr_</a>(const CallNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a263428d455e3991968778c002d2d875c">VisitExpr_</a>(const CastNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#af6e722273818f70d0894aeecd1b55615">tvm::tir::StmtExprMutator::VisitExpr_</a>(const VarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ade08e1786bce8d1cb220d3f54f3fbfeb">tvm::tir::StmtExprMutator::VisitExpr_</a>(const SizeVarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#af195e0234cba3ed03d36b04a03d3e0e0">tvm::tir::StmtExprMutator::VisitExpr_</a>(const LoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ad98f4375807adf56f3bbb47ee095e5e2">tvm::tir::StmtExprMutator::VisitExpr_</a>(const BufferLoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a755a2639a6147fb6a608e8904804bb5f">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ProducerLoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ae1c5c9652137783b1d0636b2837143aa">tvm::tir::StmtExprMutator::VisitExpr_</a>(const LetNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ac8f8e578e5a375dc472b822736e5a78b">tvm::tir::StmtExprMutator::VisitExpr_</a>(const AndNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a67c6648dbec499b4c3bb825b6610b2c7">tvm::tir::StmtExprMutator::VisitExpr_</a>(const OrNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a54677bf1b2cec622c781ba6ed4e03af9">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ReduceNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#afbd5d1454529710c5ab5ddb17eca75ab">tvm::tir::StmtExprMutator::VisitExpr_</a>(const NotNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a26ea1a3a8efcf9319349533ba3e69456">tvm::tir::StmtExprMutator::VisitExpr_</a>(const BroadcastNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a8e7e4d5ed12604f8c6019dd6d7cf93b8">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ShuffleNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a46b43325d9ad8cfe09818ba1dda4b46f">tvm::tir::StmtExprMutator::VisitExpr_</a>(const IntImmNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#aa0156d62a4bf1cf57ffab381125bfd6b">tvm::tir::StmtExprMutator::VisitExpr_</a>(const FloatImmNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#abe827bb16d1930225186627791cea131">tvm::tir::StmtExprMutator::VisitExpr_</a>(const StringImmNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#afa46554fba071aae744164711b6112ae">tvm::tir::StmtExprMutator::VisitExpr_</a>(const AnyNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a1e24adea3e49d9381246b376349db51f">VisitSeqStmt_</a>(const SeqStmtNode *op, bool flatten_before_visit, std::function&lt; Stmt(const Stmt &amp;)&gt; fmutate=nullptr)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a4306d1beba05fa1ac582503498a6d7ce">VisitStmt</a>(const Stmt &amp;stmt) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abd51f9d25b5e2419034f028b17aded2a">VisitStmt_</a>(const ForNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#aca20c14f1f058ffb3ef6c8bba1fda6f1">VisitStmt_</a>(const AttrStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#acab424302ee742c9759da1696519496a">VisitStmt_</a>(const BlockRealizeNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a74a17d6246a62100e699dec154ceef72">VisitStmt_</a>(const BlockNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ade08e1786bce8d1cb220d3f54f3fbfeb">tvm::tir::StmtExprMutator::VisitExpr_</a>(const SizeVarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#af195e0234cba3ed03d36b04a03d3e0e0">tvm::tir::StmtExprMutator::VisitExpr_</a>(const LoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ad98f4375807adf56f3bbb47ee095e5e2">tvm::tir::StmtExprMutator::VisitExpr_</a>(const BufferLoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a755a2639a6147fb6a608e8904804bb5f">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ProducerLoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ae1c5c9652137783b1d0636b2837143aa">tvm::tir::StmtExprMutator::VisitExpr_</a>(const LetNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ac8f8e578e5a375dc472b822736e5a78b">tvm::tir::StmtExprMutator::VisitExpr_</a>(const AndNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a67c6648dbec499b4c3bb825b6610b2c7">tvm::tir::StmtExprMutator::VisitExpr_</a>(const OrNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a54677bf1b2cec622c781ba6ed4e03af9">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ReduceNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#afbd5d1454529710c5ab5ddb17eca75ab">tvm::tir::StmtExprMutator::VisitExpr_</a>(const NotNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a26ea1a3a8efcf9319349533ba3e69456">tvm::tir::StmtExprMutator::VisitExpr_</a>(const BroadcastNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a8e7e4d5ed12604f8c6019dd6d7cf93b8">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ShuffleNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a46b43325d9ad8cfe09818ba1dda4b46f">tvm::tir::StmtExprMutator::VisitExpr_</a>(const IntImmNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#aa0156d62a4bf1cf57ffab381125bfd6b">tvm::tir::StmtExprMutator::VisitExpr_</a>(const FloatImmNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#abe827bb16d1930225186627791cea131">tvm::tir::StmtExprMutator::VisitExpr_</a>(const StringImmNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#afa46554fba071aae744164711b6112ae">tvm::tir::StmtExprMutator::VisitExpr_</a>(const AnyNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a1e24adea3e49d9381246b376349db51f">VisitSeqStmt_</a>(const SeqStmtNode *op, bool flatten_before_visit, std::function&lt; Stmt(const Stmt &amp;)&gt; fmutate=nullptr)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a4306d1beba05fa1ac582503498a6d7ce">VisitStmt</a>(const Stmt &amp;stmt) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">inline</span><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abd51f9d25b5e2419034f028b17aded2a">VisitStmt_</a>(const ForNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#aca20c14f1f058ffb3ef6c8bba1fda6f1">VisitStmt_</a>(const AttrStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#acab424302ee742c9759da1696519496a">VisitStmt_</a>(const BlockRealizeNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a74a17d6246a62100e699dec154ceef72">VisitStmt_</a>(const BlockNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">VisitStmt_</a>(const LetStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a7bc8fad1381862ce012819bd4762a1b6">tvm::tir::StmtExprMutator::VisitStmt_</a>(const IfThenElseNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a51d66e88ec3779c117bab5ba9406091c">tvm::tir::StmtExprMutator::VisitStmt_</a>(const LetStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a971adb4c88808a67317da73954e093b5">tvm::tir::StmtExprMutator::VisitStmt_</a>(const WhileNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ab094e88d4bda5882756d136c15db6260">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AllocateNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ade45b66284e1bc514afeb45bfd645b1f">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AllocateConstNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a741a1278db48ed1bb552a5286d26e564">tvm::tir::StmtExprMutator::VisitStmt_</a>(const DeclBufferNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a3b116212aaf79bc898f3446a35f7fd3e">tvm::tir::StmtExprMutator::VisitStmt_</a>(const StoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#abea492fd2fee9442cc1995f46ef7fd60">tvm::tir::StmtExprMutator::VisitStmt_</a>(const BufferStoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aaf5a2292da207073d637cc4d1a0704ce">tvm::tir::StmtExprMutator::VisitStmt_</a>(const BufferRealizeNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aa3a94c8b418881f10d855592b847c2e7">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AssertStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ac08e5c1f548566668b547e7107b99adb">tvm::tir::StmtExprMutator::VisitStmt_</a>(const ProducerStoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#afda082108ac22091230cb937086e7a58">tvm::tir::StmtExprMutator::VisitStmt_</a>(const ProducerRealizeNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aecd16bf1a6715ea36f6c30e5dc2ceae7">tvm::tir::StmtExprMutator::VisitStmt_</a>(const PrefetchNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a60b18d6d6bfcb692ab4a369465a175a3">tvm::tir::StmtExprMutator::VisitStmt_</a>(const SeqStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a2ec423a8f109916abf02ac463308f58a">tvm::tir::StmtExprMutator::VisitStmt_</a>(const EvaluateNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a971adb4c88808a67317da73954e093b5">tvm::tir::StmtExprMutator::VisitStmt_</a>(const WhileNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ab094e88d4bda5882756d136c15db6260">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AllocateNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ade45b66284e1bc514afeb45bfd645b1f">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AllocateConstNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a741a1278db48ed1bb552a5286d26e564">tvm::tir::StmtExprMutator::VisitStmt_</a>(const DeclBufferNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a3b116212aaf79bc898f3446a35f7fd3e">tvm::tir::StmtExprMutator::VisitStmt_</a>(const StoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#abea492fd2fee9442cc1995f46ef7fd60">tvm::tir::StmtExprMutator::VisitStmt_</a>(const BufferStoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aaf5a2292da207073d637cc4d1a0704ce">tvm::tir::StmtExprMutator::VisitStmt_</a>(const BufferRealizeNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aa3a94c8b418881f10d855592b847c2e7">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AssertStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ac08e5c1f548566668b547e7107b99adb">tvm::tir::StmtExprMutator::VisitStmt_</a>(const ProducerStoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#afda082108ac22091230cb937086e7a58">tvm::tir::StmtExprMutator::VisitStmt_</a>(const ProducerRealizeNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aecd16bf1a6715ea36f6c30e5dc2ceae7">tvm::tir::StmtExprMutator::VisitStmt_</a>(const PrefetchNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a60b18d6d6bfcb692ab4a369465a175a3">tvm::tir::StmtExprMutator::VisitStmt_</a>(const SeqStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a2ec423a8f109916abf02ac463308f58a">tvm::tir::StmtExprMutator::VisitStmt_</a>(const EvaluateNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
 </table></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer.html b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer.html
index 00bd07a991..e8822b403c 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer.html
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer.html
@@ -84,7 +84,7 @@ Inheritance diagram for tvm::tir::DataTypeLegalizer:</div>
 <div class="dynheader">
 Collaboration diagram for tvm::tir::DataTypeLegalizer:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg" width="438" height="787"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg" width="438" height="802"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
@@ -98,6 +98,10 @@ Protected Member Functions</h2></td></tr>
 <tr class="separator:acab424302ee742c9759da1696519496a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a74a17d6246a62100e699dec154ceef72"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a74a17d6246a62100e699dec154ceef72">VisitStmt_</a> (const <a class="el" href="classtvm_1_1tir_1_1BlockNode.html">BlockNode</a> *op) override</td></tr>
 <tr class="separator:a74a17d6246a62100e699dec154ceef72"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a87e951cbdf97e52218f21db3fbbfaf38"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">VisitStmt_</a> (const <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> *op) override</td></tr>
+<tr class="separator:a87e951cbdf97e52218f21db3fbbfaf38"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a947d190184b5f3d1e406216909a2cf18"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a947d190184b5f3d1e406216909a2cf18">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *op) override</td></tr>
+<tr class="separator:a947d190184b5f3d1e406216909a2cf18"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3c9ea744377f09c9656cc3718eed7bfb"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1SelectNode.html">SelectNode</a> *op) override</td></tr>
 <tr class="separator:a3c9ea744377f09c9656cc3718eed7bfb"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:abba02eaed632f7b46d078e087f0b4217"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abba02eaed632f7b46d078e087f0b4217">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1RampNode.html">RampNode</a> *op) override</td></tr>
@@ -265,6 +269,8 @@ Protected Member Functions</h2></td></tr>
 Protected Attributes</h2></td></tr>
 <tr class="memitem:a4b60203572648ecb12a2aa72a552318d"><td class="memItemLeft" align="right" valign="top">std::unordered_map&lt; const <a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> *, <a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">ivmap_</a></td></tr>
 <tr class="separator:a4b60203572648ecb12a2aa72a552318d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1de0d50699d7d15618b19d2cb7fadb35"><td class="memItemLeft" align="right" valign="top">std::unordered_map&lt; const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">var_remap_</a></td></tr>
+<tr class="separator:a1de0d50699d7d15618b19d2cb7fadb35"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td colspan="2" onclick="javascript:toggleInherit('pro_attribs_classtvm_1_1tir_1_1StmtMutator')"><img src="closed.png" alt="-"/>&#160;Protected Attributes inherited from <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td></tr>
 <tr class="memitem:a620e6041832441d25ee4f4d65921231f inherit pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a620e6041832441d25ee4f4d65921231f">allow_copy_on_write_</a> {false}</td></tr>
 <tr class="memdesc:a620e6041832441d25ee4f4d65921231f inherit pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td class="mdescLeft">&#160;</td><td class="mdescRight">Internal state to indicate whether copy on write is enabled. COW is enabled iff all the parents of the node are unique.  <a href="classtvm_1_1tir_1_1StmtMutator.html#a620e6041832441d25ee4f4d65921231f">More...</a><br /></td></tr>
@@ -286,8 +292,34 @@ Additional Inherited Members</h2></td></tr>
 </ul>
 <p>Usually we enforce the consistency of data types when constructing the IR nodes. However, such inconsistency may happen as a result of IR mutation in some passes. This class can be used as base class of such passes to ensure the consistency of data types. </p>
 </div><h2 class="groupheader">Member Function Documentation</h2>
+<a id="a947d190184b5f3d1e406216909a2cf18"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a947d190184b5f3d1e406216909a2cf18">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[1/20]</span></h2>
+
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::tir::DataTypeLegalizer::VisitExpr_ </td>
+          <td>(</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *&#160;</td>
+          <td class="paramname"><em>op</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">override</span><span class="mlabel">protected</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
+</div>
+</div>
 <a id="a3c9ea744377f09c9656cc3718eed7bfb"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a3c9ea744377f09c9656cc3718eed7bfb">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[1/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a3c9ea744377f09c9656cc3718eed7bfb">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[2/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -313,7 +345,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="abba02eaed632f7b46d078e087f0b4217"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#abba02eaed632f7b46d078e087f0b4217">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[2/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#abba02eaed632f7b46d078e087f0b4217">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[3/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -339,7 +371,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="ace95c42561653586686d4e810b3f6760"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ace95c42561653586686d4e810b3f6760">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[3/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#ace95c42561653586686d4e810b3f6760">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[4/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -365,7 +397,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a7faae5c6746c2911d3c4d82f4b0802cf"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a7faae5c6746c2911d3c4d82f4b0802cf">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[4/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a7faae5c6746c2911d3c4d82f4b0802cf">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[5/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -391,7 +423,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a1f08a5698666b1a98455ef75c1d8a434"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a1f08a5698666b1a98455ef75c1d8a434">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[5/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a1f08a5698666b1a98455ef75c1d8a434">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[6/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -417,7 +449,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a5bad1d0823b0e51fa7c5dbef34f49833"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a5bad1d0823b0e51fa7c5dbef34f49833">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[6/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a5bad1d0823b0e51fa7c5dbef34f49833">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[7/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -443,7 +475,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="ac1e09304dc701e922244e1a4587be114"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ac1e09304dc701e922244e1a4587be114">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[7/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#ac1e09304dc701e922244e1a4587be114">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[8/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -469,7 +501,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a622799ab2b39ebdb6087534f4aab6d2f"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a622799ab2b39ebdb6087534f4aab6d2f">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[8/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a622799ab2b39ebdb6087534f4aab6d2f">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[9/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -495,7 +527,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="ae8bfafc2f1bd2568acefa9b984caeb85"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ae8bfafc2f1bd2568acefa9b984caeb85">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[9/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#ae8bfafc2f1bd2568acefa9b984caeb85">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[10/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -521,7 +553,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="af2a2d31de7d1325c00f923aff28a5904"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#af2a2d31de7d1325c00f923aff28a5904">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[10/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#af2a2d31de7d1325c00f923aff28a5904">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[11/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -547,7 +579,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a69fc10196783329cd24628db8d29cab6"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a69fc10196783329cd24628db8d29cab6">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[11/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a69fc10196783329cd24628db8d29cab6">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[12/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -573,7 +605,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a1503fb6579a078beadfccbe1eba1b717"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a1503fb6579a078beadfccbe1eba1b717">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[12/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a1503fb6579a078beadfccbe1eba1b717">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[13/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -599,7 +631,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a9a526030daa59f557991710eebbcd713"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a9a526030daa59f557991710eebbcd713">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[13/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a9a526030daa59f557991710eebbcd713">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[14/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -625,7 +657,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="aedf84c8c4df1f4c451d41d9c9b8fa266"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aedf84c8c4df1f4c451d41d9c9b8fa266">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[14/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#aedf84c8c4df1f4c451d41d9c9b8fa266">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[15/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -651,7 +683,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="ad56676b279c7602022e9515564ab746d"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ad56676b279c7602022e9515564ab746d">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[15/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#ad56676b279c7602022e9515564ab746d">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[16/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -677,7 +709,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="ad6f857beadbb88b9db1c5bafaac59c8c"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ad6f857beadbb88b9db1c5bafaac59c8c">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[16/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#ad6f857beadbb88b9db1c5bafaac59c8c">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[17/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -703,7 +735,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a3bee1a8d9a9fd0ab7336810682919787"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a3bee1a8d9a9fd0ab7336810682919787">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[17/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a3bee1a8d9a9fd0ab7336810682919787">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[18/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -729,7 +761,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="aa78adca6635007e175e6b17a4a758d40"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aa78adca6635007e175e6b17a4a758d40">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[18/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#aa78adca6635007e175e6b17a4a758d40">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[19/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -755,7 +787,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a263428d455e3991968778c002d2d875c"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a263428d455e3991968778c002d2d875c">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[19/19]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a263428d455e3991968778c002d2d875c">&#9670;&nbsp;</a></span>VisitExpr_() <span class="overload">[20/20]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -781,7 +813,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="abd51f9d25b5e2419034f028b17aded2a"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#abd51f9d25b5e2419034f028b17aded2a">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[1/4]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#abd51f9d25b5e2419034f028b17aded2a">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[1/5]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -807,7 +839,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="aca20c14f1f058ffb3ef6c8bba1fda6f1"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aca20c14f1f058ffb3ef6c8bba1fda6f1">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[2/4]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#aca20c14f1f058ffb3ef6c8bba1fda6f1">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[2/5]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -833,7 +865,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="acab424302ee742c9759da1696519496a"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#acab424302ee742c9759da1696519496a">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[3/4]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#acab424302ee742c9759da1696519496a">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[3/5]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -859,7 +891,7 @@ Additional Inherited Members</h2></td></tr>
 </div>
 </div>
 <a id="a74a17d6246a62100e699dec154ceef72"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a74a17d6246a62100e699dec154ceef72">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[4/4]</span></h2>
+<h2 class="memtitle"><span class="permalink"><a href="#a74a17d6246a62100e699dec154ceef72">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[4/5]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -882,6 +914,32 @@ Additional Inherited Members</h2></td></tr>
 </table>
 </div><div class="memdoc">
 
+</div>
+</div>
+<a id="a87e951cbdf97e52218f21db3fbbfaf38"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a87e951cbdf97e52218f21db3fbbfaf38">&#9670;&nbsp;</a></span>VisitStmt_() <span class="overload">[5/5]</span></h2>
+
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a> tvm::tir::DataTypeLegalizer::VisitStmt_ </td>
+          <td>(</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> *&#160;</td>
+          <td class="paramname"><em>op</em></td><td>)</td>
+          <td></td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">override</span><span class="mlabel">protected</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
 </div>
 </div>
 <h2 class="groupheader">Member Data Documentation</h2>
@@ -905,6 +963,28 @@ Additional Inherited Members</h2></td></tr>
 </table>
 </div><div class="memdoc">
 
+</div>
+</div>
+<a id="a1de0d50699d7d15618b19d2cb7fadb35"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a1de0d50699d7d15618b19d2cb7fadb35">&#9670;&nbsp;</a></span>var_remap_</h2>
+
+<div class="memitem">
+<div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
+      <table class="memname">
+        <tr>
+          <td class="memname">std::unordered_map&lt;const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a>*, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a>&gt; tvm::tir::DataTypeLegalizer::var_remap_</td>
+        </tr>
+      </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">protected</span></span>  </td>
+  </tr>
+</table>
+</div><div class="memdoc">
+
 </div>
 </div>
 <hr/>The documentation for this class was generated from the following file:<ul>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg
index 4f789dfd9b..2515d08d2f 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__coll__graph.svg
@@ -4,109 +4,111 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: tvm::tir::DataTypeLegalizer Pages: 1 -->
-<svg width="328pt" height="590pt"
- viewBox="0.00 0.00 328.00 590.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 586)">
+<svg width="328pt" height="601pt"
+ viewBox="0.00 0.00 328.00 601.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 597)">
 <title>tvm::tir::DataTypeLegalizer</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-586 324,-586 324,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-597 324,-597 324,4 -4,4"/>
 <!-- Node8 -->
 <g id="node1" class="node">
 <title>Node8</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="74.5,-.5 74.5,-167.5 223.5,-167.5 223.5,-.5 74.5,-.5"/>
-<text text-anchor="middle" x="149" y="-155.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
-<polyline fill="none" stroke="#000000" points="74.5,-148.5 223.5,-148.5 "/>
-<text text-anchor="start" x="82.5" y="-136.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="74.5,-.5 74.5,-178.5 223.5,-178.5 223.5,-.5 74.5,-.5"/>
+<text text-anchor="middle" x="149" y="-166.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
+<polyline fill="none" stroke="#000000" points="74.5,-159.5 223.5,-159.5 "/>
+<text text-anchor="start" x="82.5" y="-147.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
+<text text-anchor="start" x="82.5" y="-136.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
 <polyline fill="none" stroke="#000000" points="74.5,-129.5 223.5,-129.5 "/>
 <text text-anchor="start" x="82.5" y="-117.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-106.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-95.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-84.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="82.5" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="82.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </g>
 <!-- Node9 -->
 <g id="node2" class="node">
 <title>Node9</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_top" xlink:title="Mutator that recursively mutates stmts and exprs on them. ">
-<polygon fill="#ffffff" stroke="#000000" points="78.5,-204.5 78.5,-261.5 219.5,-261.5 219.5,-204.5 78.5,-204.5"/>
-<text text-anchor="middle" x="149" y="-249.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
-<polyline fill="none" stroke="#000000" points="78.5,-242.5 219.5,-242.5 "/>
-<text text-anchor="middle" x="149" y="-230.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="78.5,-223.5 219.5,-223.5 "/>
-<text text-anchor="start" x="86.5" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
+<polygon fill="#ffffff" stroke="#000000" points="78.5,-215.5 78.5,-272.5 219.5,-272.5 219.5,-215.5 78.5,-215.5"/>
+<text text-anchor="middle" x="149" y="-260.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
+<polyline fill="none" stroke="#000000" points="78.5,-253.5 219.5,-253.5 "/>
+<text text-anchor="middle" x="149" y="-241.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="78.5,-234.5 219.5,-234.5 "/>
+<text text-anchor="start" x="86.5" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
 </a>
 </g>
 </g>
 <!-- Node9&#45;&gt;Node8 -->
 <g id="edge1" class="edge">
 <title>Node9&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M149,-194.2025C149,-185.8906 149,-176.8023 149,-167.5245"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-194.2713 149,-204.2713 152.5001,-194.2713 145.5001,-194.2713"/>
+<path fill="none" stroke="#191970" d="M149,-205.039C149,-196.8624 149,-187.9198 149,-178.7458"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-205.3018 149,-215.3018 152.5001,-205.3019 145.5001,-205.3018"/>
 </g>
 <!-- Node10 -->
 <g id="node3" class="node">
 <title>Node10</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1StmtMutator.html" target="_top" xlink:title="StmtMutator that mutates the statements. ">
-<polygon fill="#ffffff" stroke="#000000" points="9.5,-298.5 9.5,-476.5 144.5,-476.5 144.5,-298.5 9.5,-298.5"/>
-<text text-anchor="middle" x="77" y="-464.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtMutator</text>
-<polyline fill="none" stroke="#000000" points="9.5,-457.5 144.5,-457.5 "/>
-<text text-anchor="start" x="17.5" y="-445.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># allow_copy_on_write_</text>
-<polyline fill="none" stroke="#000000" points="9.5,-438.5 144.5,-438.5 "/>
-<text text-anchor="start" x="17.5" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator()()</text>
-<text text-anchor="start" x="17.5" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># CopyOnWrite()</text>
-<text text-anchor="start" x="17.5" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt()</text>
-<text text-anchor="start" x="17.5" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
+<polygon fill="#ffffff" stroke="#000000" points="9.5,-309.5 9.5,-487.5 144.5,-487.5 144.5,-309.5 9.5,-309.5"/>
+<text text-anchor="middle" x="77" y="-475.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtMutator</text>
+<polyline fill="none" stroke="#000000" points="9.5,-468.5 144.5,-468.5 "/>
+<text text-anchor="start" x="17.5" y="-456.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># allow_copy_on_write_</text>
+<polyline fill="none" stroke="#000000" points="9.5,-449.5 144.5,-449.5 "/>
+<text text-anchor="start" x="17.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator()()</text>
+<text text-anchor="start" x="17.5" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># CopyOnWrite()</text>
+<text text-anchor="start" x="17.5" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt()</text>
+<text text-anchor="start" x="17.5" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
+<text text-anchor="start" x="17.5" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="17.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="17.5" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="17.5" y="-360.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="17.5" y="-349.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="17.5" y="-338.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="17.5" y="-327.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="17.5" y="-316.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="17.5" y="-305.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="17.5" y="-316.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
 </a>
 </g>
 </g>
 <!-- Node10&#45;&gt;Node9 -->
 <g id="edge2" class="edge">
 <title>Node10&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M122.914,-288.9763C127.4704,-279.1989 131.797,-269.9147 135.5695,-261.8196"/>
-<polygon fill="none" stroke="#191970" points="119.7259,-287.5316 118.6742,-298.0741 126.0707,-290.4885 119.7259,-287.5316"/>
+<path fill="none" stroke="#191970" d="M122.914,-299.9763C127.4704,-290.1989 131.797,-280.9147 135.5695,-272.8196"/>
+<polygon fill="none" stroke="#191970" points="119.7259,-298.5316 118.6742,-309.0741 126.0707,-301.4885 119.7259,-298.5316"/>
 </g>
 <!-- Node11 -->
 <g id="node4" class="node">
 <title>Node11</title>
 <g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1StmtFunctor.html" target="_top" xlink:title="{tvm::tir::StmtFunctor\l\&lt; Stmt(const Stmt &amp;)\&gt;\n||}">
-<polygon fill="#ffffff" stroke="#000000" points="0,-513.5 0,-581.5 130,-581.5 130,-513.5 0,-513.5"/>
-<text text-anchor="start" x="8" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
-<text text-anchor="middle" x="65" y="-558.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Stmt(const Stmt &amp;)&gt;</text>
-<polyline fill="none" stroke="#000000" points="0,-551.5 130,-551.5 "/>
-<text text-anchor="middle" x="65" y="-539.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="0,-532.5 130,-532.5 "/>
-<text text-anchor="middle" x="65" y="-520.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polygon fill="#ffffff" stroke="#000000" points="0,-524.5 0,-592.5 130,-592.5 130,-524.5 0,-524.5"/>
+<text text-anchor="start" x="8" y="-580.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtFunctor</text>
+<text text-anchor="middle" x="65" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Stmt(const Stmt &amp;)&gt;</text>
+<polyline fill="none" stroke="#000000" points="0,-562.5 130,-562.5 "/>
+<text text-anchor="middle" x="65" y="-550.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="0,-543.5 130,-543.5 "/>
+<text text-anchor="middle" x="65" y="-531.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
 </a>
 </g>
 </g>
 <!-- Node11&#45;&gt;Node10 -->
 <g id="edge3" class="edge">
 <title>Node11&#45;&gt;Node10</title>
-<path fill="none" stroke="#006400" d="M68.3354,-503.0284C68.9587,-494.7175 69.6302,-485.7646 70.3137,-476.6512"/>
-<polygon fill="none" stroke="#006400" points="64.8321,-502.9422 67.5743,-513.176 71.8125,-503.4658 64.8321,-502.9422"/>
+<path fill="none" stroke="#006400" d="M68.3354,-514.0284C68.9587,-505.7175 69.6302,-496.7646 70.3137,-487.6512"/>
+<polygon fill="none" stroke="#006400" points="64.8321,-513.9422 67.5743,-524.176 71.8125,-514.4658 64.8321,-513.9422"/>
 </g>
 <!-- Node12 -->
 <g id="node5" class="node">
 <title>Node12</title>
 <g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1ExprMutator.html" target="_top" xlink:title="ExprMutator that mutates expressions. ">
-<polygon fill="#ffffff" stroke="#000000" points="169,-304 169,-471 287,-471 287,-304 169,-304"/>
-<text text-anchor="middle" x="228" y="-459" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprMutator</text>
-<polyline fill="none" stroke="#000000" points="169,-452 287,-452 "/>
-<text text-anchor="middle" x="228" y="-440" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="169,-433 287,-433 "/>
+<polygon fill="#ffffff" stroke="#000000" points="169,-315 169,-482 287,-482 287,-315 169,-315"/>
+<text text-anchor="middle" x="228" y="-470" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprMutator</text>
+<polyline fill="none" stroke="#000000" points="169,-463 287,-463 "/>
+<text text-anchor="middle" x="228" y="-451" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="169,-444 287,-444 "/>
+<text text-anchor="start" x="177" y="-432" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="177" y="-421" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="177" y="-410" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="177" y="-399" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
@@ -116,36 +118,35 @@
 <text text-anchor="start" x="177" y="-355" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="177" y="-344" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="177" y="-333" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="177" y="-322" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="177" y="-311" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 25 more...</text>
+<text text-anchor="start" x="177" y="-322" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 25 more...</text>
 </a>
 </g>
 </g>
 <!-- Node12&#45;&gt;Node9 -->
 <g id="edge4" class="edge">
 <title>Node12&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M180.5531,-294.7082C174.4479,-282.7684 168.607,-271.3454 163.6467,-261.6446"/>
-<polygon fill="none" stroke="#191970" points="177.5941,-296.6093 185.263,-303.9194 183.8266,-293.4224 177.5941,-296.6093"/>
+<path fill="none" stroke="#191970" d="M180.5531,-305.7082C174.4479,-293.7684 168.607,-282.3454 163.6467,-272.6446"/>
+<polygon fill="none" stroke="#191970" points="177.5941,-307.6093 185.263,-314.9194 183.8266,-304.4224 177.5941,-307.6093"/>
 </g>
 <!-- Node13 -->
 <g id="node6" class="node">
 <title>Node13</title>
 <g id="a_node6"><a xlink:href="classtvm_1_1tir_1_1ExprFunctor.html" target="_top" xlink:title="{tvm::tir::ExprFunctor\l\&lt; PrimExpr(const PrimExpr &amp;)\&gt;\n||}">
-<polygon fill="#ffffff" stroke="#000000" points="148,-513.5 148,-581.5 320,-581.5 320,-513.5 148,-513.5"/>
-<text text-anchor="start" x="156" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
-<text text-anchor="middle" x="234" y="-558.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; PrimExpr(const PrimExpr &amp;)&gt;</text>
-<polyline fill="none" stroke="#000000" points="148,-551.5 320,-551.5 "/>
-<text text-anchor="middle" x="234" y="-539.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="148,-532.5 320,-532.5 "/>
-<text text-anchor="middle" x="234" y="-520.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polygon fill="#ffffff" stroke="#000000" points="148,-524.5 148,-592.5 320,-592.5 320,-524.5 148,-524.5"/>
+<text text-anchor="start" x="156" y="-580.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::ExprFunctor</text>
+<text text-anchor="middle" x="234" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; PrimExpr(const PrimExpr &amp;)&gt;</text>
+<polyline fill="none" stroke="#000000" points="148,-562.5 320,-562.5 "/>
+<text text-anchor="middle" x="234" y="-550.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="148,-543.5 320,-543.5 "/>
+<text text-anchor="middle" x="234" y="-531.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
 </a>
 </g>
 </g>
 <!-- Node13&#45;&gt;Node12 -->
 <g id="edge5" class="edge">
 <title>Node13&#45;&gt;Node12</title>
-<path fill="none" stroke="#006400" d="M232.3353,-503.1075C231.9595,-493.0866 231.5486,-482.1291 231.1343,-471.0805"/>
-<polygon fill="none" stroke="#006400" points="228.8405,-503.3142 232.7128,-513.176 235.8356,-503.0518 228.8405,-503.3142"/>
+<path fill="none" stroke="#006400" d="M232.3353,-514.1075C231.9595,-504.0866 231.5486,-493.1291 231.1343,-482.0805"/>
+<polygon fill="none" stroke="#006400" points="228.8405,-514.3142 232.7128,-524.176 235.8356,-514.0518 228.8405,-514.3142"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__inherit__graph.svg
index ca5c7a4799..f5eb3517a9 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1DataTypeLegalizer__inherit__graph.svg
@@ -12,33 +12,33 @@
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="74.5,-375.5 74.5,-542.5 223.5,-542.5 223.5,-375.5 74.5,-375.5"/>
+<polygon fill="#bfbfbf" stroke="#000000" points="74.5,-364.5 74.5,-542.5 223.5,-542.5 223.5,-364.5 74.5,-364.5"/>
 <text text-anchor="middle" x="149" y="-530.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
 <polyline fill="none" stroke="#000000" points="74.5,-523.5 223.5,-523.5 "/>
 <text text-anchor="start" x="82.5" y="-511.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="74.5,-504.5 223.5,-504.5 "/>
-<text text-anchor="start" x="82.5" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-500.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="74.5,-493.5 223.5,-493.5 "/>
 <text text-anchor="start" x="82.5" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </g>
 <!-- Node6 -->
 <g id="node7" class="node">
 <title>Node6</title>
 <g id="a_node7"><a xlink:href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html" target="_top" xlink:title="Data type rewriter for buffer indices. ">
-<polygon fill="#ffffff" stroke="#000000" points="63,-138.5 63,-338.5 235,-338.5 235,-138.5 63,-138.5"/>
-<text text-anchor="middle" x="149" y="-326.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="63,-319.5 235,-319.5 "/>
-<text text-anchor="start" x="71" y="-307.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polygon fill="#ffffff" stroke="#000000" points="63,-138.5 63,-327.5 235,-327.5 235,-138.5 63,-138.5"/>
+<text text-anchor="middle" x="149" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="63,-308.5 235,-308.5 "/>
+<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
 <text text-anchor="start" x="71" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># buffer_remap_</text>
 <polyline fill="none" stroke="#000000" points="63,-267.5 235,-267.5 "/>
 <text text-anchor="start" x="71" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
@@ -58,8 +58,8 @@
 <!-- Node0&#45;&gt;Node6 -->
 <g id="edge6" class="edge">
 <title>Node0&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M149,-365.1586C149,-356.4328 149,-347.5668 149,-338.7858"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-365.324 149,-375.324 152.5001,-365.324 145.5001,-365.324"/>
+<path fill="none" stroke="#191970" d="M149,-354.1939C149,-345.4093 149,-336.531 149,-327.7853"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-354.4498 149,-364.4498 152.5001,-354.4498 145.5001,-354.4498"/>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
@@ -77,8 +77,8 @@
 <!-- Node1&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
 <title>Node1&#45;&gt;Node0</title>
-<path fill="none" stroke="#191970" d="M149,-569.2025C149,-560.8906 149,-551.8023 149,-542.5245"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-569.2713 149,-579.2713 152.5001,-569.2713 145.5001,-569.2713"/>
+<path fill="none" stroke="#191970" d="M149,-569.039C149,-560.8624 149,-551.9198 149,-542.7458"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-569.3018 149,-579.3018 152.5001,-569.3019 145.5001,-569.3018"/>
 </g>
 <!-- Node2 -->
 <g id="node3" class="node">
@@ -199,8 +199,8 @@
 <!-- Node6&#45;&gt;Node7 -->
 <g id="edge7" class="edge">
 <title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M149,-128.3353C149,-119.0891 149,-110.0626 149,-101.6311"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-128.4371 149,-138.4371 152.5001,-128.4371 145.5001,-128.4371"/>
+<path fill="none" stroke="#191970" d="M149,-128.258C149,-119.0849 149,-110.1025 149,-101.6953"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-128.2685 149,-138.2685 152.5001,-128.2685 145.5001,-128.2685"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1ExprMutator__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1ExprMutator__inherit__graph.svg
index e2be5a0b6e..5b6e073087 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1ExprMutator__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1ExprMutator__inherit__graph.svg
@@ -72,41 +72,41 @@
 <g id="node4" class="node">
 <title>Node3</title>
 <g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="18,-375.5 18,-542.5 167,-542.5 167,-375.5 18,-375.5"/>
+<polygon fill="#ffffff" stroke="#000000" points="18,-364.5 18,-542.5 167,-542.5 167,-364.5 18,-364.5"/>
 <text text-anchor="middle" x="92.5" y="-530.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
 <polyline fill="none" stroke="#000000" points="18,-523.5 167,-523.5 "/>
 <text text-anchor="start" x="26" y="-511.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="18,-504.5 167,-504.5 "/>
-<text text-anchor="start" x="26" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="26" y="-500.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="18,-493.5 167,-493.5 "/>
 <text text-anchor="start" x="26" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="26" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="26" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="26" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="26" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="26" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="26" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="26" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="26" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="26" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="26" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="26" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="26" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="26" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge3" class="edge">
 <title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M92.5,-569.2025C92.5,-560.8906 92.5,-551.8023 92.5,-542.5245"/>
-<polygon fill="none" stroke="#191970" points="89.0001,-569.2713 92.5,-579.2713 96.0001,-569.2713 89.0001,-569.2713"/>
+<path fill="none" stroke="#191970" d="M92.5,-569.039C92.5,-560.8624 92.5,-551.9198 92.5,-542.7458"/>
+<polygon fill="none" stroke="#191970" points="89.0001,-569.3018 92.5,-579.3018 96.0001,-569.3019 89.0001,-569.3018"/>
 </g>
 <!-- Node4 -->
 <g id="node5" class="node">
 <title>Node4</title>
 <g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html" target="_top" xlink:title="Data type rewriter for buffer indices. ">
-<polygon fill="#ffffff" stroke="#000000" points="6.5,-138.5 6.5,-338.5 178.5,-338.5 178.5,-138.5 6.5,-138.5"/>
-<text text-anchor="middle" x="92.5" y="-326.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="6.5,-319.5 178.5,-319.5 "/>
-<text text-anchor="start" x="14.5" y="-307.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="14.5" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<text text-anchor="start" x="14.5" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polygon fill="#ffffff" stroke="#000000" points="6.5,-138.5 6.5,-327.5 178.5,-327.5 178.5,-138.5 6.5,-138.5"/>
+<text text-anchor="middle" x="92.5" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="6.5,-308.5 178.5,-308.5 "/>
+<text text-anchor="start" x="14.5" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="14.5" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
 <text text-anchor="start" x="14.5" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># buffer_remap_</text>
 <polyline fill="none" stroke="#000000" points="6.5,-267.5 178.5,-267.5 "/>
 <text text-anchor="start" x="14.5" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
@@ -126,8 +126,8 @@
 <!-- Node3&#45;&gt;Node4 -->
 <g id="edge4" class="edge">
 <title>Node3&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M92.5,-365.1586C92.5,-356.4328 92.5,-347.5668 92.5,-338.7858"/>
-<polygon fill="none" stroke="#191970" points="89.0001,-365.324 92.5,-375.324 96.0001,-365.324 89.0001,-365.324"/>
+<path fill="none" stroke="#191970" d="M92.5,-354.1939C92.5,-345.4093 92.5,-336.531 92.5,-327.7853"/>
+<polygon fill="none" stroke="#191970" points="89.0001,-354.4498 92.5,-364.4498 96.0001,-354.4498 89.0001,-354.4498"/>
 </g>
 <!-- Node5 -->
 <g id="node6" class="node">
@@ -149,8 +149,8 @@
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M92.5,-128.3353C92.5,-119.0891 92.5,-110.0626 92.5,-101.6311"/>
-<polygon fill="none" stroke="#191970" points="89.0001,-128.4371 92.5,-138.4371 96.0001,-128.4371 89.0001,-128.4371"/>
+<path fill="none" stroke="#191970" d="M92.5,-128.258C92.5,-119.0849 92.5,-110.1025 92.5,-101.6953"/>
+<polygon fill="none" stroke="#191970" points="89.0001,-128.2685 92.5,-138.2685 96.0001,-128.2685 89.0001,-128.2685"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer-members.html b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer-members.html
index 4cee27ce31..49a04f6b4d 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer-members.html
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer-members.html
@@ -81,7 +81,7 @@ $(function() {
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html#a8d04a64a80c9fbdd28c7cb9e00c02120">Parent</a> typedef</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html">tvm::tir::IndexDataTypeNormalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html#afc2a6d4cb922111b859d75e3ba43c150">Rewrite</a>(PrimFunc func)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html">tvm::tir::IndexDataTypeNormalizer</a></td><td class="entry"></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html#ac3433736903957d9062b6ebd62cc3a81">target_data_type_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html">tvm::tir::IndexDataTypeNormalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb37d3c5dba">var_remap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">var_remap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#ac3cbdd10c2660208ba65dab805968c79">VisitBlockAnnotations</a>(const Map&lt; String, ObjectRef &gt; &amp;annotations)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a020018fd34983116e16548986da455f9">VisitBuffer</a>(const Buffer &amp;buffer)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aa44e8eb51377a5329c30cd13410fb4df">VisitBufferRegion</a>(const BufferRegion &amp;region)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
@@ -133,7 +133,7 @@ $(function() {
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aa6411333a5796b7056a674cc03173d71">VisitStmt_</a>(const AllocateNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a0195f5def23df2aafef01255243390f3">VisitStmt_</a>(const ForNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#aca20c14f1f058ffb3ef6c8bba1fda6f1">tvm::tir::DataTypeLegalizer::VisitStmt_</a>(const AttrStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a51d66e88ec3779c117bab5ba9406091c">tvm::tir::StmtExprMutator::VisitStmt_</a>(const LetStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">tvm::tir::DataTypeLegalizer::VisitStmt_</a>(const LetStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a971adb4c88808a67317da73954e093b5">tvm::tir::StmtExprMutator::VisitStmt_</a>(const WhileNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ade45b66284e1bc514afeb45bfd645b1f">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AllocateConstNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a3b116212aaf79bc898f3446a35f7fd3e">tvm::tir::StmtExprMutator::VisitStmt_</a>(const StoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer.html b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer.html
index 5432b91082..9c0793158c 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer.html
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer.html
@@ -86,7 +86,7 @@ Inheritance diagram for tvm::tir::IndexDataTypeNormalizer:</div>
 <div class="dynheader">
 Collaboration diagram for tvm::tir::IndexDataTypeNormalizer:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1IndexDataTypeNormalizer__coll__graph.svg" width="763" height="1656"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1IndexDataTypeNormalizer__coll__graph.svg" width="724" height="1656"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
@@ -169,6 +169,10 @@ Protected Member Functions</h2></td></tr>
 <tr class="separator:acab424302ee742c9759da1696519496a inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a74a17d6246a62100e699dec154ceef72 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a74a17d6246a62100e699dec154ceef72">VisitStmt_</a> (const <a class="el" href="classtvm_1_1tir_1_1BlockNode.html">BlockNode</a> *op) override</td></tr>
 <tr class="separator:a74a17d6246a62100e699dec154ceef72 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a87e951cbdf97e52218f21db3fbbfaf38 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">VisitStmt_</a> (const <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> *op) override</td></tr>
+<tr class="separator:a87e951cbdf97e52218f21db3fbbfaf38 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a947d190184b5f3d1e406216909a2cf18 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a947d190184b5f3d1e406216909a2cf18">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *op) override</td></tr>
+<tr class="separator:a947d190184b5f3d1e406216909a2cf18 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3c9ea744377f09c9656cc3718eed7bfb inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1SelectNode.html">SelectNode</a> *op) override</td></tr>
 <tr class="separator:a3c9ea744377f09c9656cc3718eed7bfb inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:abba02eaed632f7b46d078e087f0b4217 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abba02eaed632f7b46d078e087f0b4217">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1RampNode.html">RampNode</a> *op) override</td></tr>
@@ -341,13 +345,13 @@ Protected Attributes</h2></td></tr>
 <tr class="separator:a56fc45d85bdc5ed5c0c22ea938cb8c20 inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a734cbb5c4096dc0970a9e29bff1445ea inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a734cbb5c4096dc0970a9e29bff1445ea">is_condition_</a> {false}</td></tr>
 <tr class="separator:a734cbb5c4096dc0970a9e29bff1445ea inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a34ba7e2f7b8a676f84a8fcb37d3c5dba inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt; <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a>, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb [...]
-<tr class="separator:a34ba7e2f7b8a676f84a8fcb37d3c5dba inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a8ee3de0fbd707b0062c32c0511d53904 inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt; <a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>, <a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a8ee3de0fbd7 [...]
 <tr class="separator:a8ee3de0fbd707b0062c32c0511d53904 inherit pro_attribs_classtvm_1_1tir_1_1IndexDataTypeRewriter"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td colspan="2" onclick="javascript:toggleInherit('pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer')"><img src="closed.png" alt="-"/>&#160;Protected Attributes inherited from <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td></tr>
 <tr class="memitem:a4b60203572648ecb12a2aa72a552318d inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top">std::unordered_map&lt; const <a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> *, <a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">ivmap_< [...]
 <tr class="separator:a4b60203572648ecb12a2aa72a552318d inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1de0d50699d7d15618b19d2cb7fadb35 inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top">std::unordered_map&lt; const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">var_remap_</a></td></tr>
+<tr class="separator:a1de0d50699d7d15618b19d2cb7fadb35 inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td colspan="2" onclick="javascript:toggleInherit('pro_attribs_classtvm_1_1tir_1_1StmtMutator')"><img src="closed.png" alt="-"/>&#160;Protected Attributes inherited from <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td></tr>
 <tr class="memitem:a620e6041832441d25ee4f4d65921231f inherit pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a620e6041832441d25ee4f4d65921231f">allow_copy_on_write_</a> {false}</td></tr>
 <tr class="memdesc:a620e6041832441d25ee4f4d65921231f inherit pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td class="mdescLeft">&#160;</td><td class="mdescRight">Internal state to indicate whether copy on write is enabled. COW is enabled iff all the parents of the node are unique.  <a href="classtvm_1_1tir_1_1StmtMutator.html#a620e6041832441d25ee4f4d65921231f">More...</a><br /></td></tr>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__coll__graph.svg
index ef43d71c61..f4a6acfd63 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__coll__graph.svg
@@ -4,102 +4,103 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: tvm::tir::IndexDataTypeNormalizer Pages: 1 -->
-<svg width="572pt" height="1242pt"
- viewBox="0.00 0.00 572.00 1242.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="543pt" height="1242pt"
+ viewBox="0.00 0.00 543.00 1242.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1238)">
 <title>tvm::tir::IndexDataTypeNormalizer</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1238 568,-1238 568,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1238 539,-1238 539,4 -4,4"/>
 <!-- Node8 -->
 <g id="node1" class="node">
 <title>Node8</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="320.5,-.5 320.5,-101.5 505.5,-101.5 505.5,-.5 320.5,-.5"/>
-<text text-anchor="middle" x="413" y="-89.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeNormalizer</text>
-<polyline fill="none" stroke="#000000" points="320.5,-82.5 505.5,-82.5 "/>
-<text text-anchor="middle" x="413" y="-70.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="320.5,-63.5 505.5,-63.5 "/>
-<text text-anchor="start" x="328.5" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ IndexDataTypeNormalizer()</text>
-<text text-anchor="start" x="328.5" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Rewrite()</text>
-<text text-anchor="start" x="328.5" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="328.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="328.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="291.5,-.5 291.5,-101.5 476.5,-101.5 476.5,-.5 291.5,-.5"/>
+<text text-anchor="middle" x="384" y="-89.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeNormalizer</text>
+<polyline fill="none" stroke="#000000" points="291.5,-82.5 476.5,-82.5 "/>
+<text text-anchor="middle" x="384" y="-70.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="291.5,-63.5 476.5,-63.5 "/>
+<text text-anchor="start" x="299.5" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ IndexDataTypeNormalizer()</text>
+<text text-anchor="start" x="299.5" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Rewrite()</text>
+<text text-anchor="start" x="299.5" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="299.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="299.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 </g>
 <!-- Node9 -->
 <g id="node2" class="node">
 <title>Node9</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html" target="_top" xlink:title="Data type rewriter for buffer indices. ">
-<polygon fill="#ffffff" stroke="#000000" points="242,-188 242,-366 414,-366 414,-188 242,-188"/>
-<text text-anchor="middle" x="328" y="-354" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="242,-347 414,-347 "/>
-<text text-anchor="start" x="250" y="-335" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="250" y="-324" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<polyline fill="none" stroke="#000000" points="242,-317 414,-317 "/>
-<text text-anchor="start" x="250" y="-305" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-294" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-283" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-272" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="250" y="-261" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitIndices()</text>
-<text text-anchor="start" x="250" y="-250" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-239" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-228" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-217" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="250" y="-206" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="250" y="-195" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="213,-188 213,-366 385,-366 385,-188 213,-188"/>
+<text text-anchor="middle" x="299" y="-354" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="213,-347 385,-347 "/>
+<text text-anchor="start" x="221" y="-335" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="221" y="-324" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
+<polyline fill="none" stroke="#000000" points="213,-317 385,-317 "/>
+<text text-anchor="start" x="221" y="-305" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-294" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-283" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-272" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="221" y="-261" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitIndices()</text>
+<text text-anchor="start" x="221" y="-250" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-239" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-228" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-217" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="221" y="-206" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="221" y="-195" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
 </a>
 </g>
 </g>
 <!-- Node9&#45;&gt;Node8 -->
 <g id="edge1" class="edge">
 <title>Node9&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M365.2287,-178.0154C375.1526,-151.6296 385.4659,-124.2083 393.9591,-101.6265"/>
-<polygon fill="none" stroke="#191970" points="361.8387,-177.0868 361.5943,-187.6788 368.3906,-179.5511 361.8387,-177.0868"/>
+<path fill="none" stroke="#191970" d="M336.2287,-178.0154C346.1526,-151.6296 356.4659,-124.2083 364.9591,-101.6265"/>
+<polygon fill="none" stroke="#191970" points="332.8387,-177.0868 332.5943,-187.6788 339.3906,-179.5511 332.8387,-177.0868"/>
 </g>
 <!-- Node10 -->
 <g id="node3" class="node">
 <title>Node10</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="103.5,-463.5 103.5,-630.5 252.5,-630.5 252.5,-463.5 103.5,-463.5"/>
-<text text-anchor="middle" x="178" y="-618.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
-<polyline fill="none" stroke="#000000" points="103.5,-611.5 252.5,-611.5 "/>
-<text text-anchor="start" x="111.5" y="-599.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="103.5,-592.5 252.5,-592.5 "/>
-<text text-anchor="start" x="111.5" y="-580.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-558.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-547.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-536.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-525.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-514.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-503.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="150.5,-458 150.5,-636 299.5,-636 299.5,-458 150.5,-458"/>
+<text text-anchor="middle" x="225" y="-624" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
+<polyline fill="none" stroke="#000000" points="150.5,-617 299.5,-617 "/>
+<text text-anchor="start" x="158.5" y="-605" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
+<text text-anchor="start" x="158.5" y="-594" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="150.5,-587 299.5,-587 "/>
+<text text-anchor="start" x="158.5" y="-575" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-564" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-553" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-542" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-531" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-520" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-509" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-498" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-487" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-476" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-465" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node10&#45;&gt;Node9 -->
 <g id="edge2" class="edge">
 <title>Node10&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M229.3073,-454.6469C245.1862,-426.0649 262.6442,-394.6405 278.4049,-366.2712"/>
-<polygon fill="none" stroke="#191970" points="226.2306,-452.978 224.4337,-463.4194 232.3497,-456.3775 226.2306,-452.978"/>
+<path fill="none" stroke="#191970" d="M252.1179,-448.0564C259.4743,-421.2155 267.393,-392.3227 274.5902,-366.0628"/>
+<polygon fill="none" stroke="#191970" points="248.7033,-447.2741 249.4355,-457.8436 255.4543,-449.1244 248.7033,-447.2741"/>
 </g>
 <!-- Node11 -->
 <g id="node4" class="node">
 <title>Node11</title>
 <g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_top" xlink:title="Mutator that recursively mutates stmts and exprs on them. ">
-<polygon fill="#ffffff" stroke="#000000" points="107.5,-762 107.5,-819 248.5,-819 248.5,-762 107.5,-762"/>
-<text text-anchor="middle" x="178" y="-807" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
-<polyline fill="none" stroke="#000000" points="107.5,-800 248.5,-800 "/>
-<text text-anchor="middle" x="178" y="-788" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="107.5,-781 248.5,-781 "/>
-<text text-anchor="start" x="115.5" y="-769" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
+<polygon fill="#ffffff" stroke="#000000" points="150.5,-762 150.5,-819 291.5,-819 291.5,-762 150.5,-762"/>
+<text text-anchor="middle" x="221" y="-807" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
+<polyline fill="none" stroke="#000000" points="150.5,-800 291.5,-800 "/>
+<text text-anchor="middle" x="221" y="-788" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="150.5,-781 291.5,-781 "/>
+<text text-anchor="start" x="158.5" y="-769" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
 </a>
 </g>
 </g>
 <!-- Node11&#45;&gt;Node10 -->
 <g id="edge3" class="edge">
 <title>Node11&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M178,-751.1966C178,-718.7294 178,-671.5084 178,-630.6525"/>
-<polygon fill="none" stroke="#191970" points="174.5001,-751.6111 178,-761.6111 181.5001,-751.6112 174.5001,-751.6111"/>
+<path fill="none" stroke="#191970" d="M221.6452,-751.2215C222.1553,-720.1704 222.8873,-675.6131 223.5374,-636.037"/>
+<polygon fill="none" stroke="#191970" points="218.1394,-751.555 221.4746,-761.6111 225.1384,-751.67 218.1394,-751.555"/>
 </g>
 <!-- Node12 -->
 <g id="node5" class="node">
@@ -128,8 +129,8 @@
 <!-- Node12&#45;&gt;Node11 -->
 <g id="edge4" class="edge">
 <title>Node12&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M113.453,-940.1042C132.2313,-896.5807 152.9081,-848.6569 165.6756,-819.0651"/>
-<polygon fill="none" stroke="#191970" points="110.2262,-938.7483 109.4783,-949.3167 116.6535,-941.5214 110.2262,-938.7483"/>
+<path fill="none" stroke="#191970" d="M130.1833,-940.6503C156.5955,-896.9821 185.7495,-848.7808 203.7227,-819.0651"/>
+<polygon fill="none" stroke="#191970" points="127.1221,-938.9487 124.9415,-949.3167 133.1118,-942.5715 127.1221,-938.9487"/>
 </g>
 <!-- Node13 -->
 <g id="node6" class="node">
@@ -177,8 +178,8 @@
 <!-- Node14&#45;&gt;Node11 -->
 <g id="edge6" class="edge">
 <title>Node14&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M209.1208,-944.8594C200.1178,-900.2044 189.9863,-849.9523 183.7984,-819.2603"/>
-<polygon fill="none" stroke="#191970" points="205.7346,-945.7733 211.142,-954.8843 212.5965,-944.3897 205.7346,-945.7733"/>
+<path fill="none" stroke="#191970" d="M225.3569,-944.8594C224.0965,-900.2044 222.6781,-849.9523 221.8118,-819.2603"/>
+<polygon fill="none" stroke="#191970" points="221.859,-944.987 225.6399,-954.8843 228.8562,-944.7895 221.859,-944.987"/>
 </g>
 <!-- Node15 -->
 <g id="node8" class="node">
@@ -204,174 +205,136 @@
 <g id="node9" class="node">
 <title>Node16</title>
 <g id="a_node9"><a xlink:href="classtvm_1_1runtime_1_1Map.html" target="_top" xlink:title="{tvm::runtime::Map\&lt;\l tvm::tir::Buffer, tvm\l::tir::Buffer \&gt;\n||+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ operator=()\l+ operator=()\l+ at()\land 12 more...\l}">
-<polygon fill="#ffffff" stroke="#000000" points="270,-452.5 270,-641.5 386,-641.5 386,-452.5 270,-452.5"/>
-<text text-anchor="start" x="278" y="-629.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Map&lt;</text>
-<text text-anchor="start" x="278" y="-618.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> tvm::tir::Buffer, tvm</text>
-<text text-anchor="middle" x="328" y="-607.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::tir::Buffer &gt;</text>
-<polyline fill="none" stroke="#000000" points="270,-600.5 386,-600.5 "/>
-<text text-anchor="middle" x="328" y="-588.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="270,-581.5 386,-581.5 "/>
-<text text-anchor="start" x="278" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-558.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-547.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-536.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-525.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-514.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-503.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="278" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="278" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ at()</text>
-<text text-anchor="start" x="278" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 12 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="318,-452.5 318,-641.5 434,-641.5 434,-452.5 318,-452.5"/>
+<text text-anchor="start" x="326" y="-629.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Map&lt;</text>
+<text text-anchor="start" x="326" y="-618.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> tvm::tir::Buffer, tvm</text>
+<text text-anchor="middle" x="376" y="-607.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::tir::Buffer &gt;</text>
+<polyline fill="none" stroke="#000000" points="318,-600.5 434,-600.5 "/>
+<text text-anchor="middle" x="376" y="-588.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="318,-581.5 434,-581.5 "/>
+<text text-anchor="start" x="326" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-558.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-547.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-536.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-525.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-514.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-503.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
+<text text-anchor="start" x="326" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
+<text text-anchor="start" x="326" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ at()</text>
+<text text-anchor="start" x="326" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 12 more...</text>
 </a>
 </g>
 </g>
 <!-- Node16&#45;&gt;Node9 -->
 <g id="edge8" class="edge">
 <title>Node16&#45;&gt;Node9</title>
-<path fill="none" stroke="#404040" d="M328,-452.1878C328,-428.2701 328,-402.5006 328,-378.2786"/>
-<polygon fill="none" stroke="#404040" points="328.0001,-378.1505 324,-372.1506 328,-366.1505 332,-372.1505 328.0001,-378.1505"/>
-<text text-anchor="middle" x="369" y="-426" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #buffer_remap_</text>
+<path fill="none" stroke="#404040" d="M348.961,-452.1878C342.1088,-428.1608 334.7238,-402.2652 327.7885,-377.9468"/>
+<polygon fill="none" stroke="#404040" points="327.7155,-377.6904 322.2233,-373.0175 324.4244,-366.1505 329.9166,-370.8235 327.7155,-377.6904"/>
+<text text-anchor="middle" x="384" y="-426" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #buffer_remap_</text>
 </g>
 <!-- Node17 -->
 <g id="node10" class="node">
 <title>Node17</title>
 <g id="a_node10"><a xlink:href="classtvm_1_1runtime_1_1ObjectRef.html" target="_top" xlink:title="Base class of all object reference. ">
-<polygon fill="#ffffff" stroke="#000000" points="315,-679.5 315,-901.5 449,-901.5 449,-679.5 315,-679.5"/>
-<text text-anchor="middle" x="382" y="-889.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
-<polyline fill="none" stroke="#000000" points="315,-882.5 449,-882.5 "/>
-<text text-anchor="start" x="323" y="-870.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_is_nullable</text>
-<polyline fill="none" stroke="#000000" points="315,-863.5 449,-863.5 "/>
-<text text-anchor="start" x="323" y="-851.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
-<text text-anchor="start" x="323" y="-840.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
-<text text-anchor="start" x="323" y="-829.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ same_as()</text>
-<text text-anchor="start" x="323" y="-818.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator==()</text>
-<text text-anchor="start" x="323" y="-807.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator!=()</text>
-<text text-anchor="start" x="323" y="-796.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&lt;()</text>
-<text text-anchor="start" x="323" y="-785.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ defined()</text>
-<text text-anchor="start" x="323" y="-774.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
-<text text-anchor="start" x="323" y="-763.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
-<text text-anchor="start" x="323" y="-752.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ unique()</text>
-<text text-anchor="start" x="323" y="-741.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ use_count()</text>
-<text text-anchor="start" x="323" y="-730.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ as()</text>
-<text text-anchor="start" x="323" y="-719.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># get_mutable()</text>
-<text text-anchor="start" x="323" y="-708.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DowncastNoCheck()</text>
-<text text-anchor="start" x="323" y="-697.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># FFIClearAfterMove()</text>
-<text text-anchor="start" x="323" y="-686.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetDataPtr()</text>
+<polygon fill="#ffffff" stroke="#000000" points="310,-679.5 310,-901.5 444,-901.5 444,-679.5 310,-679.5"/>
+<text text-anchor="middle" x="377" y="-889.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
+<polyline fill="none" stroke="#000000" points="310,-882.5 444,-882.5 "/>
+<text text-anchor="start" x="318" y="-870.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_is_nullable</text>
+<polyline fill="none" stroke="#000000" points="310,-863.5 444,-863.5 "/>
+<text text-anchor="start" x="318" y="-851.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
+<text text-anchor="start" x="318" y="-840.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
+<text text-anchor="start" x="318" y="-829.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ same_as()</text>
+<text text-anchor="start" x="318" y="-818.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator==()</text>
+<text text-anchor="start" x="318" y="-807.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator!=()</text>
+<text text-anchor="start" x="318" y="-796.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&lt;()</text>
+<text text-anchor="start" x="318" y="-785.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ defined()</text>
+<text text-anchor="start" x="318" y="-774.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
+<text text-anchor="start" x="318" y="-763.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
+<text text-anchor="start" x="318" y="-752.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ unique()</text>
+<text text-anchor="start" x="318" y="-741.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ use_count()</text>
+<text text-anchor="start" x="318" y="-730.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ as()</text>
+<text text-anchor="start" x="318" y="-719.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># get_mutable()</text>
+<text text-anchor="start" x="318" y="-708.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DowncastNoCheck()</text>
+<text text-anchor="start" x="318" y="-697.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># FFIClearAfterMove()</text>
+<text text-anchor="start" x="318" y="-686.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetDataPtr()</text>
 </a>
 </g>
 </g>
 <!-- Node17&#45;&gt;Node16 -->
 <g id="edge9" class="edge">
 <title>Node17&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M355.1926,-669.6186C353.1004,-660.184 351.0065,-650.7425 348.9614,-641.5206"/>
-<polygon fill="none" stroke="#191970" points="351.7882,-670.4334 357.3703,-679.4384 358.6222,-668.9178 351.7882,-670.4334"/>
-</g>
-<!-- Node19 -->
-<g id="node12" class="node">
-<title>Node19</title>
-<g id="a_node12"><a xlink:href="classtvm_1_1runtime_1_1Map.html" target="_top" xlink:title="{tvm::runtime::Map\&lt;\l tvm::tir::Var, tvm\l::tir::Var \&gt;\n||+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ operator=()\l+ operator=()\l+ at()\land 12 more...\l}">
-<polygon fill="#ffffff" stroke="#000000" points="404.5,-452.5 404.5,-641.5 519.5,-641.5 519.5,-452.5 404.5,-452.5"/>
-<text text-anchor="start" x="412.5" y="-629.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Map&lt;</text>
-<text text-anchor="start" x="412.5" y="-618.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> tvm::tir::Var, tvm</text>
-<text text-anchor="middle" x="462" y="-607.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::tir::Var &gt;</text>
-<polyline fill="none" stroke="#000000" points="404.5,-600.5 519.5,-600.5 "/>
-<text text-anchor="middle" x="462" y="-588.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="404.5,-581.5 519.5,-581.5 "/>
-<text text-anchor="start" x="412.5" y="-569.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-558.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-547.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-536.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-525.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-514.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-503.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="412.5" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="412.5" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ at()</text>
-<text text-anchor="start" x="412.5" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 12 more...</text>
-</a>
-</g>
-</g>
-<!-- Node17&#45;&gt;Node19 -->
-<g id="edge12" class="edge">
-<title>Node17&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M421.6167,-669.9165C424.749,-660.3828 427.8843,-650.8395 430.946,-641.5206"/>
-<polygon fill="none" stroke="#191970" points="418.2846,-668.8456 418.4884,-679.4384 424.9349,-671.0305 418.2846,-668.8456"/>
+<path fill="none" stroke="#191970" d="M376.5023,-669.3207C376.464,-659.9853 376.4256,-650.6454 376.3882,-641.5206"/>
+<polygon fill="none" stroke="#191970" points="373.0028,-669.4529 376.5439,-679.4384 380.0027,-669.4241 373.0028,-669.4529"/>
 </g>
 <!-- Node18 -->
 <g id="node11" class="node">
 <title>Node18</title>
 <g id="a_node11"><a xlink:href="classtvm_1_1runtime_1_1ObjectPtr.html" target="_top" xlink:title="{tvm::runtime::ObjectPtr\l\&lt; tvm::runtime::Object \&gt;\n||+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ~ObjectPtr()\l+ swap()\l+ get()\l+ operator&#45;\&gt;()\land 11 more...\l}">
-<polygon fill="#ffffff" stroke="#000000" points="312,-949.5 312,-1127.5 452,-1127.5 452,-949.5 312,-949.5"/>
-<text text-anchor="start" x="320" y="-1115.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectPtr</text>
-<text text-anchor="middle" x="382" y="-1104.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::runtime::Object &gt;</text>
-<polyline fill="none" stroke="#000000" points="312,-1097.5 452,-1097.5 "/>
-<text text-anchor="middle" x="382" y="-1085.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="312,-1078.5 452,-1078.5 "/>
-<text text-anchor="start" x="320" y="-1066.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-1055.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-1044.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-1033.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-1022.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-1011.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-1000.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ~ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-989.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ swap()</text>
-<text text-anchor="start" x="320" y="-978.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
-<text text-anchor="start" x="320" y="-967.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
-<text text-anchor="start" x="320" y="-956.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="307,-949.5 307,-1127.5 447,-1127.5 447,-949.5 307,-949.5"/>
+<text text-anchor="start" x="315" y="-1115.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectPtr</text>
+<text text-anchor="middle" x="377" y="-1104.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::runtime::Object &gt;</text>
+<polyline fill="none" stroke="#000000" points="307,-1097.5 447,-1097.5 "/>
+<text text-anchor="middle" x="377" y="-1085.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="307,-1078.5 447,-1078.5 "/>
+<text text-anchor="start" x="315" y="-1066.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-1055.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-1044.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-1033.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-1022.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-1011.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-1000.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ~ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-989.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ swap()</text>
+<text text-anchor="start" x="315" y="-978.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
+<text text-anchor="start" x="315" y="-967.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
+<text text-anchor="start" x="315" y="-956.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
 </a>
 </g>
 </g>
 <!-- Node18&#45;&gt;Node17 -->
 <g id="edge10" class="edge">
 <title>Node18&#45;&gt;Node17</title>
-<path fill="none" stroke="#404040" d="M382,-949.3167C382,-937.8765 382,-926.0062 382,-914.1402"/>
-<polygon fill="none" stroke="#404040" points="382.0001,-913.7944 378,-907.7944 382,-901.7944 386,-907.7943 382.0001,-913.7944"/>
-<text text-anchor="middle" x="401.5" y="-923" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
+<path fill="none" stroke="#404040" d="M377,-949.3167C377,-937.8765 377,-926.0062 377,-914.1402"/>
+<polygon fill="none" stroke="#404040" points="377.0001,-913.7944 373,-907.7944 377,-901.7944 381,-907.7943 377.0001,-913.7944"/>
+<text text-anchor="middle" x="396.5" y="-923" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
 </g>
-<!-- Node19&#45;&gt;Node9 -->
-<g id="edge11" class="edge">
-<title>Node19&#45;&gt;Node9</title>
-<path fill="none" stroke="#404040" d="M427.1524,-452.203C422.9442,-442.2445 418.5262,-432.3501 414,-423 406.6058,-407.7252 398.1771,-391.9208 389.5555,-376.6011"/>
-<polygon fill="none" stroke="#404040" points="389.5051,-376.5125 383.0599,-373.2784 383.5663,-366.085 390.0115,-369.3191 389.5051,-376.5125"/>
-<text text-anchor="middle" x="454" y="-426" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #var_remap_</text>
-</g>
-<!-- Node20 -->
-<g id="node13" class="node">
-<title>Node20</title>
-<g id="a_node13"><a xlink:href="classtvm_1_1runtime_1_1DataType.html" target="_top" xlink:title="Runtime primitive data type. ">
-<polygon fill="#ffffff" stroke="#000000" points="432,-149.5 432,-404.5 564,-404.5 564,-149.5 432,-149.5"/>
-<text text-anchor="middle" x="498" y="-392.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::DataType</text>
-<polyline fill="none" stroke="#000000" points="432,-385.5 564,-385.5 "/>
-<text text-anchor="middle" x="498" y="-373.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="432,-366.5 564,-366.5 "/>
-<text text-anchor="start" x="440" y="-354.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ DataType()</text>
-<text text-anchor="start" x="440" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ DataType()</text>
-<text text-anchor="start" x="440" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ DataType()</text>
-<text text-anchor="start" x="440" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ code()</text>
-<text text-anchor="start" x="440" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ bits()</text>
-<text text-anchor="start" x="440" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ bytes()</text>
-<text text-anchor="start" x="440" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ lanes()</text>
-<text text-anchor="start" x="440" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ is_scalar()</text>
-<text text-anchor="start" x="440" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ is_bool()</text>
-<text text-anchor="start" x="440" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ is_float()</text>
-<text text-anchor="start" x="440" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
-<text text-anchor="start" x="440" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Int()</text>
-<text text-anchor="start" x="440" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ UInt()</text>
-<text text-anchor="start" x="440" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Float()</text>
-<text text-anchor="start" x="440" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ BFloat()</text>
-<text text-anchor="start" x="440" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Bool()</text>
-<text text-anchor="start" x="440" y="-178.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Handle()</text>
-<text text-anchor="start" x="440" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Void()</text>
-<text text-anchor="start" x="440" y="-156.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ShapeIndex()</text>
+<!-- Node19 -->
+<g id="node12" class="node">
+<title>Node19</title>
+<g id="a_node12"><a xlink:href="classtvm_1_1runtime_1_1DataType.html" target="_top" xlink:title="Runtime primitive data type. ">
+<polygon fill="#ffffff" stroke="#000000" points="403,-149.5 403,-404.5 535,-404.5 535,-149.5 403,-149.5"/>
+<text text-anchor="middle" x="469" y="-392.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::DataType</text>
+<polyline fill="none" stroke="#000000" points="403,-385.5 535,-385.5 "/>
+<text text-anchor="middle" x="469" y="-373.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="403,-366.5 535,-366.5 "/>
+<text text-anchor="start" x="411" y="-354.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ DataType()</text>
+<text text-anchor="start" x="411" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ DataType()</text>
+<text text-anchor="start" x="411" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ DataType()</text>
+<text text-anchor="start" x="411" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ code()</text>
+<text text-anchor="start" x="411" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ bits()</text>
+<text text-anchor="start" x="411" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ bytes()</text>
+<text text-anchor="start" x="411" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ lanes()</text>
+<text text-anchor="start" x="411" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ is_scalar()</text>
+<text text-anchor="start" x="411" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ is_bool()</text>
+<text text-anchor="start" x="411" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ is_float()</text>
+<text text-anchor="start" x="411" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
+<text text-anchor="start" x="411" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Int()</text>
+<text text-anchor="start" x="411" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ UInt()</text>
+<text text-anchor="start" x="411" y="-211.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Float()</text>
+<text text-anchor="start" x="411" y="-200.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ BFloat()</text>
+<text text-anchor="start" x="411" y="-189.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Bool()</text>
+<text text-anchor="start" x="411" y="-178.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Handle()</text>
+<text text-anchor="start" x="411" y="-167.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Void()</text>
+<text text-anchor="start" x="411" y="-156.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ShapeIndex()</text>
 </a>
 </g>
 </g>
-<!-- Node20&#45;&gt;Node8 -->
-<g id="edge13" class="edge">
-<title>Node20&#45;&gt;Node8</title>
-<path fill="none" stroke="#404040" d="M450.035,-149.4695C445.3117,-136.911 440.6799,-124.5961 436.3784,-113.159"/>
-<polygon fill="none" stroke="#404040" points="436.2497,-112.8166 430.3935,-108.6088 432.0252,-101.5847 437.8814,-105.7925 436.2497,-112.8166"/>
-<text text-anchor="middle" x="493" y="-123" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #target_data_type_</text>
+<!-- Node19&#45;&gt;Node8 -->
+<g id="edge11" class="edge">
+<title>Node19&#45;&gt;Node8</title>
+<path fill="none" stroke="#404040" d="M421.035,-149.4695C416.3117,-136.911 411.6799,-124.5961 407.3784,-113.159"/>
+<polygon fill="none" stroke="#404040" points="407.2497,-112.8166 401.3935,-108.6088 403.0252,-101.5847 408.8814,-105.7925 407.2497,-112.8166"/>
+<text text-anchor="middle" x="464" y="-123" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #target_data_type_</text>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__inherit__graph.svg
index 67c8b9071c..4ea33bcd01 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeNormalizer__inherit__graph.svg
@@ -27,12 +27,11 @@
 <g id="node2" class="node">
 <title>Node1</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html" target="_top" xlink:title="Data type rewriter for buffer indices. ">
-<polygon fill="#ffffff" stroke="#000000" points="63,-138.5 63,-338.5 235,-338.5 235,-138.5 63,-138.5"/>
-<text text-anchor="middle" x="149" y="-326.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="63,-319.5 235,-319.5 "/>
-<text text-anchor="start" x="71" y="-307.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polygon fill="#ffffff" stroke="#000000" points="63,-138.5 63,-327.5 235,-327.5 235,-138.5 63,-138.5"/>
+<text text-anchor="middle" x="149" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="63,-308.5 235,-308.5 "/>
+<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
 <text text-anchor="start" x="71" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># buffer_remap_</text>
 <polyline fill="none" stroke="#000000" points="63,-267.5 235,-267.5 "/>
 <text text-anchor="start" x="71" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
@@ -52,37 +51,38 @@
 <!-- Node1&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
 <title>Node1&#45;&gt;Node0</title>
-<path fill="none" stroke="#191970" d="M149,-128.3353C149,-119.0891 149,-110.0626 149,-101.6311"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-128.4371 149,-138.4371 152.5001,-128.4371 145.5001,-128.4371"/>
+<path fill="none" stroke="#191970" d="M149,-128.258C149,-119.0849 149,-110.1025 149,-101.6953"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-128.2685 149,-138.2685 152.5001,-128.2685 145.5001,-128.2685"/>
 </g>
 <!-- Node2 -->
 <g id="node3" class="node">
 <title>Node2</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="74.5,-375.5 74.5,-542.5 223.5,-542.5 223.5,-375.5 74.5,-375.5"/>
+<polygon fill="#ffffff" stroke="#000000" points="74.5,-364.5 74.5,-542.5 223.5,-542.5 223.5,-364.5 74.5,-364.5"/>
 <text text-anchor="middle" x="149" y="-530.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
 <polyline fill="none" stroke="#000000" points="74.5,-523.5 223.5,-523.5 "/>
 <text text-anchor="start" x="82.5" y="-511.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="74.5,-504.5 223.5,-504.5 "/>
-<text text-anchor="start" x="82.5" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-500.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="74.5,-493.5 223.5,-493.5 "/>
 <text text-anchor="start" x="82.5" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node1 -->
 <g id="edge2" class="edge">
 <title>Node2&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M149,-365.1586C149,-356.4328 149,-347.5668 149,-338.7858"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-365.324 149,-375.324 152.5001,-365.324 145.5001,-365.324"/>
+<path fill="none" stroke="#191970" d="M149,-354.1939C149,-345.4093 149,-336.531 149,-327.7853"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-354.4498 149,-364.4498 152.5001,-354.4498 145.5001,-354.4498"/>
 </g>
 <!-- Node3 -->
 <g id="node4" class="node">
@@ -100,8 +100,8 @@
 <!-- Node3&#45;&gt;Node2 -->
 <g id="edge3" class="edge">
 <title>Node3&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M149,-569.2025C149,-560.8906 149,-551.8023 149,-542.5245"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-569.2713 149,-579.2713 152.5001,-569.2713 145.5001,-569.2713"/>
+<path fill="none" stroke="#191970" d="M149,-569.039C149,-560.8624 149,-551.9198 149,-542.7458"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-569.3018 149,-579.3018 152.5001,-569.3019 145.5001,-569.3018"/>
 </g>
 <!-- Node4 -->
 <g id="node5" class="node">
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter-members.html b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter-members.html
index 12da0318ce..6b7baeb514 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter-members.html
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter-members.html
@@ -78,7 +78,7 @@ $(function() {
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">ivmap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a18e378023df3122893ffaf50bb89464e">operator()</a>(Stmt stmt)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">inline</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a0e7deada1e51a604e4f7bc6c8a115955">Parent</a> typedef</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb37d3c5dba">var_remap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">var_remap_</a></td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#ac3cbdd10c2660208ba65dab805968c79">VisitBlockAnnotations</a>(const Map&lt; String, ObjectRef &gt; &amp;annotations)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a020018fd34983116e16548986da455f9">VisitBuffer</a>(const Buffer &amp;buffer)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aa44e8eb51377a5329c30cd13410fb4df">VisitBufferRegion</a>(const BufferRegion &amp;region)</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
@@ -91,19 +91,19 @@ $(function() {
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a3f30102c91f36c6cdf97a047fb1f5074">VisitExpr_</a>(const GTNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a20c9bd949c70b95f12c1dff43cee1174">VisitExpr_</a>(const GENode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a006af1f277912d7fdca575e92ac33d1a">VisitExpr_</a>(const CallNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const SelectNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abba02eaed632f7b46d078e087f0b4217">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const RampNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ace95c42561653586686d4e810b3f6760">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const AddNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a7faae5c6746c2911d3c4d82f4b0802cf">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const SubNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1f08a5698666b1a98455ef75c1d8a434">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const MulNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a5bad1d0823b0e51fa7c5dbef34f49833">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const DivNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ac1e09304dc701e922244e1a4587be114">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const ModNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a622799ab2b39ebdb6087534f4aab6d2f">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const FloorDivNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ae8bfafc2f1bd2568acefa9b984caeb85">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const FloorModNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#af2a2d31de7d1325c00f923aff28a5904">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const MinNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a69fc10196783329cd24628db8d29cab6">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const MaxNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a263428d455e3991968778c002d2d875c">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const CastNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#af6e722273818f70d0894aeecd1b55615">tvm::tir::StmtExprMutator::VisitExpr_</a>(const VarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a947d190184b5f3d1e406216909a2cf18">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const VarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const SelectNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abba02eaed632f7b46d078e087f0b4217">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const RampNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ace95c42561653586686d4e810b3f6760">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const AddNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a7faae5c6746c2911d3c4d82f4b0802cf">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const SubNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1f08a5698666b1a98455ef75c1d8a434">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const MulNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a5bad1d0823b0e51fa7c5dbef34f49833">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const DivNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ac1e09304dc701e922244e1a4587be114">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const ModNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a622799ab2b39ebdb6087534f4aab6d2f">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const FloorDivNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ae8bfafc2f1bd2568acefa9b984caeb85">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const FloorModNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#af2a2d31de7d1325c00f923aff28a5904">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const MinNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a69fc10196783329cd24628db8d29cab6">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const MaxNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a263428d455e3991968778c002d2d875c">tvm::tir::DataTypeLegalizer::VisitExpr_</a>(const CastNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#ade08e1786bce8d1cb220d3f54f3fbfeb">tvm::tir::StmtExprMutator::VisitExpr_</a>(const SizeVarNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#af195e0234cba3ed03d36b04a03d3e0e0">tvm::tir::StmtExprMutator::VisitExpr_</a>(const LoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a755a2639a6147fb6a608e8904804bb5f">tvm::tir::StmtExprMutator::VisitExpr_</a>(const ProducerLoadNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">tvm::tir::ExprMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
@@ -130,7 +130,7 @@ $(function() {
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aa6411333a5796b7056a674cc03173d71">VisitStmt_</a>(const AllocateNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a0195f5def23df2aafef01255243390f3">VisitStmt_</a>(const ForNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#aca20c14f1f058ffb3ef6c8bba1fda6f1">tvm::tir::DataTypeLegalizer::VisitStmt_</a>(const AttrStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
-  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a51d66e88ec3779c117bab5ba9406091c">tvm::tir::StmtExprMutator::VisitStmt_</a>(const LetStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
+  <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">tvm::tir::DataTypeLegalizer::VisitStmt_</a>(const LetStmtNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a971adb4c88808a67317da73954e093b5">tvm::tir::StmtExprMutator::VisitStmt_</a>(const WhileNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ade45b66284e1bc514afeb45bfd645b1f">tvm::tir::StmtExprMutator::VisitStmt_</a>(const AllocateConstNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
   <tr class="even"><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a3b116212aaf79bc898f3446a35f7fd3e">tvm::tir::StmtExprMutator::VisitStmt_</a>(const StoreNode *op) override</td><td class="entry"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td><td class="entry"><span class="mlabel">protected</span></td></tr>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter.html b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter.html
index b692d44716..8553a88080 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter.html
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter.html
@@ -85,7 +85,7 @@ Inheritance diagram for tvm::tir::IndexDataTypeRewriter:</div>
 <div class="dynheader">
 Collaboration diagram for tvm::tir::IndexDataTypeRewriter:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1IndexDataTypeRewriter__coll__graph.svg" width="704" height="1355"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1tir_1_1IndexDataTypeRewriter__coll__graph.svg" width="607" height="1355"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <table class="memberdecls">
@@ -147,6 +147,10 @@ Protected Member Functions</h2></td></tr>
 <tr class="separator:acab424302ee742c9759da1696519496a inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a74a17d6246a62100e699dec154ceef72 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a74a17d6246a62100e699dec154ceef72">VisitStmt_</a> (const <a class="el" href="classtvm_1_1tir_1_1BlockNode.html">BlockNode</a> *op) override</td></tr>
 <tr class="separator:a74a17d6246a62100e699dec154ceef72 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a87e951cbdf97e52218f21db3fbbfaf38 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">VisitStmt_</a> (const <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> *op) override</td></tr>
+<tr class="separator:a87e951cbdf97e52218f21db3fbbfaf38 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a947d190184b5f3d1e406216909a2cf18 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a947d190184b5f3d1e406216909a2cf18">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *op) override</td></tr>
+<tr class="separator:a947d190184b5f3d1e406216909a2cf18 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3c9ea744377f09c9656cc3718eed7bfb inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1SelectNode.html">SelectNode</a> *op) override</td></tr>
 <tr class="separator:a3c9ea744377f09c9656cc3718eed7bfb inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:abba02eaed632f7b46d078e087f0b4217 inherit pro_methods_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abba02eaed632f7b46d078e087f0b4217">VisitExpr_</a> (const <a class="el" href="classtvm_1_1tir_1_1RampNode.html">RampNode</a> *op) override</td></tr>
@@ -316,13 +320,13 @@ Protected Attributes</h2></td></tr>
 <tr class="separator:a56fc45d85bdc5ed5c0c22ea938cb8c20"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a734cbb5c4096dc0970a9e29bff1445ea"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a734cbb5c4096dc0970a9e29bff1445ea">is_condition_</a> {false}</td></tr>
 <tr class="separator:a734cbb5c4096dc0970a9e29bff1445ea"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a34ba7e2f7b8a676f84a8fcb37d3c5dba"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt; <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a>, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb37d3c5dba">var_remap_</a></td></tr>
-<tr class="separator:a34ba7e2f7b8a676f84a8fcb37d3c5dba"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a8ee3de0fbd707b0062c32c0511d53904"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt; <a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a>, <a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a8ee3de0fbd707b0062c32c0511d53904">buffer_remap_</a></td></tr>
 <tr class="separator:a8ee3de0fbd707b0062c32c0511d53904"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td colspan="2" onclick="javascript:toggleInherit('pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer')"><img src="closed.png" alt="-"/>&#160;Protected Attributes inherited from <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html">tvm::tir::DataTypeLegalizer</a></td></tr>
 <tr class="memitem:a4b60203572648ecb12a2aa72a552318d inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top">std::unordered_map&lt; const <a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> *, <a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">ivmap_< [...]
 <tr class="separator:a4b60203572648ecb12a2aa72a552318d inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1de0d50699d7d15618b19d2cb7fadb35 inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memItemLeft" align="right" valign="top">std::unordered_map&lt; const <a class="el" href="classtvm_1_1tir_1_1VarNode.html">VarNode</a> *, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a> &gt;&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">var_remap_</a></td></tr>
+<tr class="separator:a1de0d50699d7d15618b19d2cb7fadb35 inherit pro_attribs_classtvm_1_1tir_1_1DataTypeLegalizer"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="inherit_header pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td colspan="2" onclick="javascript:toggleInherit('pro_attribs_classtvm_1_1tir_1_1StmtMutator')"><img src="closed.png" alt="-"/>&#160;Protected Attributes inherited from <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html">tvm::tir::StmtMutator</a></td></tr>
 <tr class="memitem:a620e6041832441d25ee4f4d65921231f inherit pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td class="memItemLeft" align="right" valign="top">bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a620e6041832441d25ee4f4d65921231f">allow_copy_on_write_</a> {false}</td></tr>
 <tr class="memdesc:a620e6041832441d25ee4f4d65921231f inherit pro_attribs_classtvm_1_1tir_1_1StmtMutator"><td class="mdescLeft">&#160;</td><td class="mdescRight">Internal state to indicate whether copy on write is enabled. COW is enabled iff all the parents of the node are unique.  <a href="classtvm_1_1tir_1_1StmtMutator.html#a620e6041832441d25ee4f4d65921231f">More...</a><br /></td></tr>
@@ -973,28 +977,6 @@ Additional Inherited Members</h2></td></tr>
 </table>
 </div><div class="memdoc">
 
-</div>
-</div>
-<a id="a34ba7e2f7b8a676f84a8fcb37d3c5dba"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a34ba7e2f7b8a676f84a8fcb37d3c5dba">&#9670;&nbsp;</a></span>var_remap_</h2>
-
-<div class="memitem">
-<div class="memproto">
-<table class="mlabels">
-  <tr>
-  <td class="mlabels-left">
-      <table class="memname">
-        <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a>&lt;<a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a>, <a class="el" href="classtvm_1_1tir_1_1Var.html">Var</a>&gt; tvm::tir::IndexDataTypeRewriter::var_remap_</td>
-        </tr>
-      </table>
-  </td>
-  <td class="mlabels-right">
-<span class="mlabels"><span class="mlabel">protected</span></span>  </td>
-  </tr>
-</table>
-</div><div class="memdoc">
-
 </div>
 </div>
 <hr/>The documentation for this class was generated from the following file:<ul>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__coll__graph.svg
index 1bd68e4b0a..e0514b619c 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__coll__graph.svg
@@ -4,79 +4,80 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: tvm::tir::IndexDataTypeRewriter Pages: 1 -->
-<svg width="528pt" height="1016pt"
- viewBox="0.00 0.00 527.50 1016.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="455pt" height="1016pt"
+ viewBox="0.00 0.00 455.00 1016.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 1012)">
 <title>tvm::tir::IndexDataTypeRewriter</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1012 523.5,-1012 523.5,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-1012 451,-1012 451,4 -4,4"/>
 <!-- Node8 -->
 <g id="node1" class="node">
 <title>Node8</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="242,-.5 242,-178.5 414,-178.5 414,-.5 242,-.5"/>
-<text text-anchor="middle" x="328" y="-166.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="242,-159.5 414,-159.5 "/>
-<text text-anchor="start" x="250" y="-147.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="250" y="-136.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<polyline fill="none" stroke="#000000" points="242,-129.5 414,-129.5 "/>
-<text text-anchor="start" x="250" y="-117.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-106.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-95.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-84.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="250" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitIndices()</text>
-<text text-anchor="start" x="250" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="250" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="250" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="250" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="213,-.5 213,-178.5 385,-178.5 385,-.5 213,-.5"/>
+<text text-anchor="middle" x="299" y="-166.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="213,-159.5 385,-159.5 "/>
+<text text-anchor="start" x="221" y="-147.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="221" y="-136.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
+<polyline fill="none" stroke="#000000" points="213,-129.5 385,-129.5 "/>
+<text text-anchor="start" x="221" y="-117.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-106.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-95.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-84.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="221" y="-73.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitIndices()</text>
+<text text-anchor="start" x="221" y="-62.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-51.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-40.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="221" y="-29.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="221" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="221" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
 </g>
 <!-- Node9 -->
 <g id="node2" class="node">
 <title>Node9</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="103.5,-237.5 103.5,-404.5 252.5,-404.5 252.5,-237.5 103.5,-237.5"/>
-<text text-anchor="middle" x="178" y="-392.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
-<polyline fill="none" stroke="#000000" points="103.5,-385.5 252.5,-385.5 "/>
-<text text-anchor="start" x="111.5" y="-373.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="103.5,-366.5 252.5,-366.5 "/>
-<text text-anchor="start" x="111.5" y="-354.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="111.5" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="111.5" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="150.5,-232 150.5,-410 299.5,-410 299.5,-232 150.5,-232"/>
+<text text-anchor="middle" x="225" y="-398" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
+<polyline fill="none" stroke="#000000" points="150.5,-391 299.5,-391 "/>
+<text text-anchor="start" x="158.5" y="-379" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
+<text text-anchor="start" x="158.5" y="-368" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="150.5,-361 299.5,-361 "/>
+<text text-anchor="start" x="158.5" y="-349" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-338" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-327" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-316" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-305" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="158.5" y="-294" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-283" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-272" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-261" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-250" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="158.5" y="-239" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node9&#45;&gt;Node8 -->
 <g id="edge1" class="edge">
 <title>Node9&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M237.6959,-228.8693C248.3794,-212.3811 259.4956,-195.2251 270.1268,-178.8177"/>
-<polygon fill="none" stroke="#191970" points="234.654,-227.1276 232.1534,-237.4232 240.5286,-230.9341 234.654,-227.1276"/>
+<path fill="none" stroke="#191970" d="M256.6672,-221.933C261.2667,-207.544 265.9785,-192.8038 270.5102,-178.6269"/>
+<polygon fill="none" stroke="#191970" points="253.2155,-221.2363 253.5045,-231.8272 259.8831,-223.3677 253.2155,-221.2363"/>
 </g>
 <!-- Node10 -->
 <g id="node3" class="node">
 <title>Node10</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1tir_1_1StmtExprMutator.html" target="_top" xlink:title="Mutator that recursively mutates stmts and exprs on them. ">
-<polygon fill="#ffffff" stroke="#000000" points="107.5,-536 107.5,-593 248.5,-593 248.5,-536 107.5,-536"/>
-<text text-anchor="middle" x="178" y="-581" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
-<polyline fill="none" stroke="#000000" points="107.5,-574 248.5,-574 "/>
-<text text-anchor="middle" x="178" y="-562" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="107.5,-555 248.5,-555 "/>
-<text text-anchor="start" x="115.5" y="-543" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
+<polygon fill="#ffffff" stroke="#000000" points="150.5,-536 150.5,-593 291.5,-593 291.5,-536 150.5,-536"/>
+<text text-anchor="middle" x="221" y="-581" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::StmtExprMutator</text>
+<polyline fill="none" stroke="#000000" points="150.5,-574 291.5,-574 "/>
+<text text-anchor="middle" x="221" y="-562" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="150.5,-555 291.5,-555 "/>
+<text text-anchor="start" x="158.5" y="-543" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr()</text>
 </a>
 </g>
 </g>
 <!-- Node10&#45;&gt;Node9 -->
 <g id="edge2" class="edge">
 <title>Node10&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M178,-525.1966C178,-492.7294 178,-445.5084 178,-404.6525"/>
-<polygon fill="none" stroke="#191970" points="174.5001,-525.6111 178,-535.6111 181.5001,-525.6112 174.5001,-525.6111"/>
+<path fill="none" stroke="#191970" d="M221.6452,-525.2215C222.1553,-494.1704 222.8873,-449.6131 223.5374,-410.037"/>
+<polygon fill="none" stroke="#191970" points="218.1394,-525.555 221.4746,-535.6111 225.1384,-525.67 218.1394,-525.555"/>
 </g>
 <!-- Node11 -->
 <g id="node4" class="node">
@@ -105,8 +106,8 @@
 <!-- Node11&#45;&gt;Node10 -->
 <g id="edge3" class="edge">
 <title>Node11&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M113.453,-714.1042C132.2313,-670.5807 152.9081,-622.6569 165.6756,-593.0651"/>
-<polygon fill="none" stroke="#191970" points="110.2262,-712.7483 109.4783,-723.3167 116.6535,-715.5214 110.2262,-712.7483"/>
+<path fill="none" stroke="#191970" d="M130.1833,-714.6503C156.5955,-670.9821 185.7495,-622.7808 203.7227,-593.0651"/>
+<polygon fill="none" stroke="#191970" points="127.1221,-712.9487 124.9415,-723.3167 133.1118,-716.5715 127.1221,-712.9487"/>
 </g>
 <!-- Node12 -->
 <g id="node5" class="node">
@@ -154,8 +155,8 @@
 <!-- Node13&#45;&gt;Node10 -->
 <g id="edge5" class="edge">
 <title>Node13&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M209.1208,-718.8594C200.1178,-674.2044 189.9863,-623.9523 183.7984,-593.2603"/>
-<polygon fill="none" stroke="#191970" points="205.7346,-719.7733 211.142,-728.8843 212.5965,-718.3897 205.7346,-719.7733"/>
+<path fill="none" stroke="#191970" d="M225.3569,-718.8594C224.0965,-674.2044 222.6781,-623.9523 221.8118,-593.2603"/>
+<polygon fill="none" stroke="#191970" points="221.859,-718.987 225.6399,-728.8843 228.8562,-718.7895 221.859,-718.987"/>
 </g>
 <!-- Node14 -->
 <g id="node7" class="node">
@@ -181,136 +182,98 @@
 <g id="node8" class="node">
 <title>Node15</title>
 <g id="a_node8"><a xlink:href="classtvm_1_1runtime_1_1Map.html" target="_top" xlink:title="{tvm::runtime::Map\&lt;\l tvm::tir::Buffer, tvm\l::tir::Buffer \&gt;\n||+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ operator=()\l+ operator=()\l+ at()\land 12 more...\l}">
-<polygon fill="#ffffff" stroke="#000000" points="270,-226.5 270,-415.5 386,-415.5 386,-226.5 270,-226.5"/>
-<text text-anchor="start" x="278" y="-403.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Map&lt;</text>
-<text text-anchor="start" x="278" y="-392.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> tvm::tir::Buffer, tvm</text>
-<text text-anchor="middle" x="328" y="-381.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::tir::Buffer &gt;</text>
-<polyline fill="none" stroke="#000000" points="270,-374.5 386,-374.5 "/>
-<text text-anchor="middle" x="328" y="-362.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="270,-355.5 386,-355.5 "/>
-<text text-anchor="start" x="278" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="278" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="278" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="278" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ at()</text>
-<text text-anchor="start" x="278" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 12 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="318,-226.5 318,-415.5 434,-415.5 434,-226.5 318,-226.5"/>
+<text text-anchor="start" x="326" y="-403.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Map&lt;</text>
+<text text-anchor="start" x="326" y="-392.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> tvm::tir::Buffer, tvm</text>
+<text text-anchor="middle" x="376" y="-381.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::tir::Buffer &gt;</text>
+<polyline fill="none" stroke="#000000" points="318,-374.5 434,-374.5 "/>
+<text text-anchor="middle" x="376" y="-362.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="318,-355.5 434,-355.5 "/>
+<text text-anchor="start" x="326" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
+<text text-anchor="start" x="326" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
+<text text-anchor="start" x="326" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
+<text text-anchor="start" x="326" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ at()</text>
+<text text-anchor="start" x="326" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 12 more...</text>
 </a>
 </g>
 </g>
 <!-- Node15&#45;&gt;Node8 -->
 <g id="edge7" class="edge">
 <title>Node15&#45;&gt;Node8</title>
-<path fill="none" stroke="#404040" d="M328,-226.1721C328,-214.4951 328,-202.5243 328,-190.7564"/>
-<polygon fill="none" stroke="#404040" points="328.0001,-190.5875 324,-184.5875 328,-178.5875 332,-184.5874 328.0001,-190.5875"/>
-<text text-anchor="middle" x="369" y="-200" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #buffer_remap_</text>
+<path fill="none" stroke="#404040" d="M344.459,-226.1721C340.4933,-214.2493 336.4257,-202.0201 332.4321,-190.0135"/>
+<polygon fill="none" stroke="#404040" points="332.4191,-189.9741 326.7298,-185.5433 328.6317,-178.5875 334.3209,-183.0183 332.4191,-189.9741"/>
+<text text-anchor="middle" x="379" y="-200" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #buffer_remap_</text>
 </g>
 <!-- Node16 -->
 <g id="node9" class="node">
 <title>Node16</title>
 <g id="a_node9"><a xlink:href="classtvm_1_1runtime_1_1ObjectRef.html" target="_top" xlink:title="Base class of all object reference. ">
-<polygon fill="#ffffff" stroke="#000000" points="315,-453.5 315,-675.5 449,-675.5 449,-453.5 315,-453.5"/>
-<text text-anchor="middle" x="382" y="-663.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
-<polyline fill="none" stroke="#000000" points="315,-656.5 449,-656.5 "/>
-<text text-anchor="start" x="323" y="-644.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_is_nullable</text>
-<polyline fill="none" stroke="#000000" points="315,-637.5 449,-637.5 "/>
-<text text-anchor="start" x="323" y="-625.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
-<text text-anchor="start" x="323" y="-614.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
-<text text-anchor="start" x="323" y="-603.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ same_as()</text>
-<text text-anchor="start" x="323" y="-592.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator==()</text>
-<text text-anchor="start" x="323" y="-581.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator!=()</text>
-<text text-anchor="start" x="323" y="-570.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&lt;()</text>
-<text text-anchor="start" x="323" y="-559.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ defined()</text>
-<text text-anchor="start" x="323" y="-548.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
-<text text-anchor="start" x="323" y="-537.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
-<text text-anchor="start" x="323" y="-526.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ unique()</text>
-<text text-anchor="start" x="323" y="-515.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ use_count()</text>
-<text text-anchor="start" x="323" y="-504.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ as()</text>
-<text text-anchor="start" x="323" y="-493.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># get_mutable()</text>
-<text text-anchor="start" x="323" y="-482.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DowncastNoCheck()</text>
-<text text-anchor="start" x="323" y="-471.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># FFIClearAfterMove()</text>
-<text text-anchor="start" x="323" y="-460.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetDataPtr()</text>
+<polygon fill="#ffffff" stroke="#000000" points="310,-453.5 310,-675.5 444,-675.5 444,-453.5 310,-453.5"/>
+<text text-anchor="middle" x="377" y="-663.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
+<polyline fill="none" stroke="#000000" points="310,-656.5 444,-656.5 "/>
+<text text-anchor="start" x="318" y="-644.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ _type_is_nullable</text>
+<polyline fill="none" stroke="#000000" points="310,-637.5 444,-637.5 "/>
+<text text-anchor="start" x="318" y="-625.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
+<text text-anchor="start" x="318" y="-614.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectRef()</text>
+<text text-anchor="start" x="318" y="-603.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ same_as()</text>
+<text text-anchor="start" x="318" y="-592.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator==()</text>
+<text text-anchor="start" x="318" y="-581.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator!=()</text>
+<text text-anchor="start" x="318" y="-570.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&lt;()</text>
+<text text-anchor="start" x="318" y="-559.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ defined()</text>
+<text text-anchor="start" x="318" y="-548.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
+<text text-anchor="start" x="318" y="-537.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
+<text text-anchor="start" x="318" y="-526.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ unique()</text>
+<text text-anchor="start" x="318" y="-515.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ use_count()</text>
+<text text-anchor="start" x="318" y="-504.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ as()</text>
+<text text-anchor="start" x="318" y="-493.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># get_mutable()</text>
+<text text-anchor="start" x="318" y="-482.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DowncastNoCheck()</text>
+<text text-anchor="start" x="318" y="-471.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># FFIClearAfterMove()</text>
+<text text-anchor="start" x="318" y="-460.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetDataPtr()</text>
 </a>
 </g>
 </g>
 <!-- Node16&#45;&gt;Node15 -->
 <g id="edge8" class="edge">
 <title>Node16&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M355.1926,-443.6186C353.1004,-434.184 351.0065,-424.7425 348.9614,-415.5206"/>
-<polygon fill="none" stroke="#191970" points="351.7882,-444.4334 357.3703,-453.4384 358.6222,-442.9178 351.7882,-444.4334"/>
-</g>
-<!-- Node18 -->
-<g id="node11" class="node">
-<title>Node18</title>
-<g id="a_node11"><a xlink:href="classtvm_1_1runtime_1_1Map.html" target="_top" xlink:title="{tvm::runtime::Map\&lt;\l tvm::tir::Var, tvm\l::tir::Var \&gt;\n||+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ Map()\l+ operator=()\l+ operator=()\l+ at()\land 12 more...\l}">
-<polygon fill="#ffffff" stroke="#000000" points="404.5,-226.5 404.5,-415.5 519.5,-415.5 519.5,-226.5 404.5,-226.5"/>
-<text text-anchor="start" x="412.5" y="-403.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Map&lt;</text>
-<text text-anchor="start" x="412.5" y="-392.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> tvm::tir::Var, tvm</text>
-<text text-anchor="middle" x="462" y="-381.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::tir::Var &gt;</text>
-<polyline fill="none" stroke="#000000" points="404.5,-374.5 519.5,-374.5 "/>
-<text text-anchor="middle" x="462" y="-362.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="404.5,-355.5 519.5,-355.5 "/>
-<text text-anchor="start" x="412.5" y="-343.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-332.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-321.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-310.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-299.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-288.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-277.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ Map()</text>
-<text text-anchor="start" x="412.5" y="-266.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="412.5" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator=()</text>
-<text text-anchor="start" x="412.5" y="-244.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ at()</text>
-<text text-anchor="start" x="412.5" y="-233.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 12 more...</text>
-</a>
-</g>
-</g>
-<!-- Node16&#45;&gt;Node18 -->
-<g id="edge11" class="edge">
-<title>Node16&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M421.6167,-443.9165C424.749,-434.3828 427.8843,-424.8395 430.946,-415.5206"/>
-<polygon fill="none" stroke="#191970" points="418.2846,-442.8456 418.4884,-453.4384 424.9349,-445.0305 418.2846,-442.8456"/>
+<path fill="none" stroke="#191970" d="M376.5023,-443.3207C376.464,-433.9853 376.4256,-424.6454 376.3882,-415.5206"/>
+<polygon fill="none" stroke="#191970" points="373.0028,-443.4529 376.5439,-453.4384 380.0027,-443.4241 373.0028,-443.4529"/>
 </g>
 <!-- Node17 -->
 <g id="node10" class="node">
 <title>Node17</title>
 <g id="a_node10"><a xlink:href="classtvm_1_1runtime_1_1ObjectPtr.html" target="_top" xlink:title="{tvm::runtime::ObjectPtr\l\&lt; tvm::runtime::Object \&gt;\n||+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ~ObjectPtr()\l+ swap()\l+ get()\l+ operator&#45;\&gt;()\land 11 more...\l}">
-<polygon fill="#ffffff" stroke="#000000" points="312,-723.5 312,-901.5 452,-901.5 452,-723.5 312,-723.5"/>
-<text text-anchor="start" x="320" y="-889.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectPtr</text>
-<text text-anchor="middle" x="382" y="-878.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::runtime::Object &gt;</text>
-<polyline fill="none" stroke="#000000" points="312,-871.5 452,-871.5 "/>
-<text text-anchor="middle" x="382" y="-859.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
-<polyline fill="none" stroke="#000000" points="312,-852.5 452,-852.5 "/>
-<text text-anchor="start" x="320" y="-840.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-829.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-818.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-807.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-796.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-785.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-774.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ~ObjectPtr()</text>
-<text text-anchor="start" x="320" y="-763.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ swap()</text>
-<text text-anchor="start" x="320" y="-752.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
-<text text-anchor="start" x="320" y="-741.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
-<text text-anchor="start" x="320" y="-730.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
+<polygon fill="#ffffff" stroke="#000000" points="307,-723.5 307,-901.5 447,-901.5 447,-723.5 307,-723.5"/>
+<text text-anchor="start" x="315" y="-889.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectPtr</text>
+<text text-anchor="middle" x="377" y="-878.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::runtime::Object &gt;</text>
+<polyline fill="none" stroke="#000000" points="307,-871.5 447,-871.5 "/>
+<text text-anchor="middle" x="377" y="-859.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> </text>
+<polyline fill="none" stroke="#000000" points="307,-852.5 447,-852.5 "/>
+<text text-anchor="start" x="315" y="-840.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-829.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-818.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-807.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-796.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-785.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-774.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ~ObjectPtr()</text>
+<text text-anchor="start" x="315" y="-763.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ swap()</text>
+<text text-anchor="start" x="315" y="-752.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ get()</text>
+<text text-anchor="start" x="315" y="-741.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ operator&#45;&gt;()</text>
+<text text-anchor="start" x="315" y="-730.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 11 more...</text>
 </a>
 </g>
 </g>
 <!-- Node17&#45;&gt;Node16 -->
 <g id="edge9" class="edge">
 <title>Node17&#45;&gt;Node16</title>
-<path fill="none" stroke="#404040" d="M382,-723.3167C382,-711.8765 382,-700.0062 382,-688.1402"/>
-<polygon fill="none" stroke="#404040" points="382.0001,-687.7944 378,-681.7944 382,-675.7944 386,-681.7943 382.0001,-687.7944"/>
-<text text-anchor="middle" x="401.5" y="-697" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
-</g>
-<!-- Node18&#45;&gt;Node8 -->
-<g id="edge10" class="edge">
-<title>Node18&#45;&gt;Node8</title>
-<path fill="none" stroke="#404040" d="M428.9298,-226.4813C424.3444,-216.32 419.3526,-206.3045 414,-197 412.4618,-194.3262 410.8602,-191.6488 409.2054,-188.9749"/>
-<polygon fill="none" stroke="#404040" points="409.0806,-188.7812 402.4682,-185.9042 402.5806,-178.694 409.193,-181.5709 409.0806,-188.7812"/>
-<text text-anchor="middle" x="454" y="-200" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #var_remap_</text>
+<path fill="none" stroke="#404040" d="M377,-723.3167C377,-711.8765 377,-700.0062 377,-688.1402"/>
+<polygon fill="none" stroke="#404040" points="377.0001,-687.7944 373,-681.7944 377,-675.7944 381,-681.7943 377.0001,-687.7944"/>
+<text text-anchor="middle" x="396.5" y="-697" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__inherit__graph.svg
index a2777c5444..0481c8d427 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1IndexDataTypeRewriter__inherit__graph.svg
@@ -12,12 +12,11 @@
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="63,-138.5 63,-338.5 235,-338.5 235,-138.5 63,-138.5"/>
-<text text-anchor="middle" x="149" y="-326.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="63,-319.5 235,-319.5 "/>
-<text text-anchor="start" x="71" y="-307.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="63,-138.5 63,-327.5 235,-327.5 235,-138.5 63,-138.5"/>
+<text text-anchor="middle" x="149" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="63,-308.5 235,-308.5 "/>
+<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
 <text text-anchor="start" x="71" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># buffer_remap_</text>
 <polyline fill="none" stroke="#000000" points="63,-267.5 235,-267.5 "/>
 <text text-anchor="start" x="71" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
@@ -52,37 +51,38 @@
 <!-- Node0&#45;&gt;Node7 -->
 <g id="edge7" class="edge">
 <title>Node0&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M149,-128.3353C149,-119.0891 149,-110.0626 149,-101.6311"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-128.4371 149,-138.4371 152.5001,-128.4371 145.5001,-128.4371"/>
+<path fill="none" stroke="#191970" d="M149,-128.258C149,-119.0849 149,-110.1025 149,-101.6953"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-128.2685 149,-138.2685 152.5001,-128.2685 145.5001,-128.2685"/>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
 <title>Node1</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="74.5,-375.5 74.5,-542.5 223.5,-542.5 223.5,-375.5 74.5,-375.5"/>
+<polygon fill="#ffffff" stroke="#000000" points="74.5,-364.5 74.5,-542.5 223.5,-542.5 223.5,-364.5 74.5,-364.5"/>
 <text text-anchor="middle" x="149" y="-530.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
 <polyline fill="none" stroke="#000000" points="74.5,-523.5 223.5,-523.5 "/>
 <text text-anchor="start" x="82.5" y="-511.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="74.5,-504.5 223.5,-504.5 "/>
-<text text-anchor="start" x="82.5" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-500.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="74.5,-493.5 223.5,-493.5 "/>
 <text text-anchor="start" x="82.5" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
 <title>Node1&#45;&gt;Node0</title>
-<path fill="none" stroke="#191970" d="M149,-365.1586C149,-356.4328 149,-347.5668 149,-338.7858"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-365.324 149,-375.324 152.5001,-365.324 145.5001,-365.324"/>
+<path fill="none" stroke="#191970" d="M149,-354.1939C149,-345.4093 149,-336.531 149,-327.7853"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-354.4498 149,-364.4498 152.5001,-354.4498 145.5001,-354.4498"/>
 </g>
 <!-- Node2 -->
 <g id="node3" class="node">
@@ -100,8 +100,8 @@
 <!-- Node2&#45;&gt;Node1 -->
 <g id="edge2" class="edge">
 <title>Node2&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M149,-569.2025C149,-560.8906 149,-551.8023 149,-542.5245"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-569.2713 149,-579.2713 152.5001,-569.2713 145.5001,-569.2713"/>
+<path fill="none" stroke="#191970" d="M149,-569.039C149,-560.8624 149,-551.9198 149,-542.7458"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-569.3018 149,-579.3018 152.5001,-569.3019 145.5001,-569.3018"/>
 </g>
 <!-- Node3 -->
 <g id="node4" class="node">
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtExprMutator__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtExprMutator__inherit__graph.svg
index db85a28ed0..81d51dd409 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtExprMutator__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtExprMutator__inherit__graph.svg
@@ -23,30 +23,31 @@
 <g id="node6" class="node">
 <title>Node5</title>
 <g id="a_node6"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="74.5,-375.5 74.5,-542.5 223.5,-542.5 223.5,-375.5 74.5,-375.5"/>
+<polygon fill="#ffffff" stroke="#000000" points="74.5,-364.5 74.5,-542.5 223.5,-542.5 223.5,-364.5 74.5,-364.5"/>
 <text text-anchor="middle" x="149" y="-530.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
 <polyline fill="none" stroke="#000000" points="74.5,-523.5 223.5,-523.5 "/>
 <text text-anchor="start" x="82.5" y="-511.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="74.5,-504.5 223.5,-504.5 "/>
-<text text-anchor="start" x="82.5" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-500.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="74.5,-493.5 223.5,-493.5 "/>
 <text text-anchor="start" x="82.5" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="82.5" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="82.5" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="82.5" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="82.5" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="82.5" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node0&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M149,-569.2025C149,-560.8906 149,-551.8023 149,-542.5245"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-569.2713 149,-579.2713 152.5001,-569.2713 145.5001,-569.2713"/>
+<path fill="none" stroke="#191970" d="M149,-569.039C149,-560.8624 149,-551.9198 149,-542.7458"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-569.3018 149,-579.3018 152.5001,-569.3019 145.5001,-569.3018"/>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
@@ -151,12 +152,11 @@
 <g id="node7" class="node">
 <title>Node6</title>
 <g id="a_node7"><a xlink:href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html" target="_top" xlink:title="Data type rewriter for buffer indices. ">
-<polygon fill="#ffffff" stroke="#000000" points="63,-138.5 63,-338.5 235,-338.5 235,-138.5 63,-138.5"/>
-<text text-anchor="middle" x="149" y="-326.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="63,-319.5 235,-319.5 "/>
-<text text-anchor="start" x="71" y="-307.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polygon fill="#ffffff" stroke="#000000" points="63,-138.5 63,-327.5 235,-327.5 235,-138.5 63,-138.5"/>
+<text text-anchor="middle" x="149" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="63,-308.5 235,-308.5 "/>
+<text text-anchor="start" x="71" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="71" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
 <text text-anchor="start" x="71" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># buffer_remap_</text>
 <polyline fill="none" stroke="#000000" points="63,-267.5 235,-267.5 "/>
 <text text-anchor="start" x="71" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
@@ -176,8 +176,8 @@
 <!-- Node5&#45;&gt;Node6 -->
 <g id="edge6" class="edge">
 <title>Node5&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M149,-365.1586C149,-356.4328 149,-347.5668 149,-338.7858"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-365.324 149,-375.324 152.5001,-365.324 145.5001,-365.324"/>
+<path fill="none" stroke="#191970" d="M149,-354.1939C149,-345.4093 149,-336.531 149,-327.7853"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-354.4498 149,-364.4498 152.5001,-354.4498 145.5001,-354.4498"/>
 </g>
 <!-- Node7 -->
 <g id="node8" class="node">
@@ -199,8 +199,8 @@
 <!-- Node6&#45;&gt;Node7 -->
 <g id="edge7" class="edge">
 <title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M149,-128.3353C149,-119.0891 149,-110.0626 149,-101.6311"/>
-<polygon fill="none" stroke="#191970" points="145.5001,-128.4371 149,-138.4371 152.5001,-128.4371 145.5001,-128.4371"/>
+<path fill="none" stroke="#191970" d="M149,-128.258C149,-119.0849 149,-110.1025 149,-101.6953"/>
+<polygon fill="none" stroke="#191970" points="145.5001,-128.2685 149,-138.2685 152.5001,-128.2685 145.5001,-128.2685"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtMutator__inherit__graph.svg b/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtMutator__inherit__graph.svg
index 04552bb21d..8e0e4883e9 100644
--- a/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtMutator__inherit__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1tir_1_1StmtMutator__inherit__graph.svg
@@ -73,41 +73,41 @@
 <g id="node4" class="node">
 <title>Node3</title>
 <g id="a_node4"><a xlink:href="classtvm_1_1tir_1_1DataTypeLegalizer.html" target="_top" xlink:title="Legalize the data types of expressions to make sure they are consistent with other parts of the progr...">
-<polygon fill="#ffffff" stroke="#000000" points="18,-375.5 18,-542.5 167,-542.5 167,-375.5 18,-375.5"/>
+<polygon fill="#ffffff" stroke="#000000" points="18,-364.5 18,-542.5 167,-542.5 167,-364.5 18,-364.5"/>
 <text text-anchor="middle" x="92.5" y="-530.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::DataTypeLegalizer</text>
 <polyline fill="none" stroke="#000000" points="18,-523.5 167,-523.5 "/>
 <text text-anchor="start" x="26" y="-511.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># ivmap_</text>
-<polyline fill="none" stroke="#000000" points="18,-504.5 167,-504.5 "/>
-<text text-anchor="start" x="26" y="-492.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="26" y="-500.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polyline fill="none" stroke="#000000" points="18,-493.5 167,-493.5 "/>
 <text text-anchor="start" x="26" y="-481.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="26" y="-470.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="26" y="-459.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
-<text text-anchor="start" x="26" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="26" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="26" y="-448.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
+<text text-anchor="start" x="26" y="-437.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
 <text text-anchor="start" x="26" y="-426.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="26" y="-415.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="26" y="-404.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
 <text text-anchor="start" x="26" y="-393.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
-<text text-anchor="start" x="26" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 13 more...</text>
+<text text-anchor="start" x="26" y="-382.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitExpr_()</text>
+<text text-anchor="start" x="26" y="-371.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">and 15 more...</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge3" class="edge">
 <title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M92.5,-569.2025C92.5,-560.8906 92.5,-551.8023 92.5,-542.5245"/>
-<polygon fill="none" stroke="#191970" points="89.0001,-569.2713 92.5,-579.2713 96.0001,-569.2713 89.0001,-569.2713"/>
+<path fill="none" stroke="#191970" d="M92.5,-569.039C92.5,-560.8624 92.5,-551.9198 92.5,-542.7458"/>
+<polygon fill="none" stroke="#191970" points="89.0001,-569.3018 92.5,-579.3018 96.0001,-569.3019 89.0001,-569.3018"/>
 </g>
 <!-- Node4 -->
 <g id="node5" class="node">
 <title>Node4</title>
 <g id="a_node5"><a xlink:href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html" target="_top" xlink:title="Data type rewriter for buffer indices. ">
-<polygon fill="#ffffff" stroke="#000000" points="6.5,-138.5 6.5,-338.5 178.5,-338.5 178.5,-138.5 6.5,-138.5"/>
-<text text-anchor="middle" x="92.5" y="-326.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
-<polyline fill="none" stroke="#000000" points="6.5,-319.5 178.5,-319.5 "/>
-<text text-anchor="start" x="14.5" y="-307.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
-<text text-anchor="start" x="14.5" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
-<text text-anchor="start" x="14.5" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># var_remap_</text>
+<polygon fill="#ffffff" stroke="#000000" points="6.5,-138.5 6.5,-327.5 178.5,-327.5 178.5,-138.5 6.5,-138.5"/>
+<text text-anchor="middle" x="92.5" y="-315.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::tir::IndexDataTypeRewriter</text>
+<polyline fill="none" stroke="#000000" points="6.5,-308.5 178.5,-308.5 "/>
+<text text-anchor="start" x="14.5" y="-296.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_enabled_</text>
+<text text-anchor="start" x="14.5" y="-285.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># is_condition_</text>
 <text text-anchor="start" x="14.5" y="-274.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># buffer_remap_</text>
 <polyline fill="none" stroke="#000000" points="6.5,-267.5 178.5,-267.5 "/>
 <text text-anchor="start" x="14.5" y="-255.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># VisitStmt_()</text>
@@ -127,8 +127,8 @@
 <!-- Node3&#45;&gt;Node4 -->
 <g id="edge4" class="edge">
 <title>Node3&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M92.5,-365.1586C92.5,-356.4328 92.5,-347.5668 92.5,-338.7858"/>
-<polygon fill="none" stroke="#191970" points="89.0001,-365.324 92.5,-375.324 96.0001,-365.324 89.0001,-365.324"/>
+<path fill="none" stroke="#191970" d="M92.5,-354.1939C92.5,-345.4093 92.5,-336.531 92.5,-327.7853"/>
+<polygon fill="none" stroke="#191970" points="89.0001,-354.4498 92.5,-364.4498 96.0001,-354.4498 89.0001,-354.4498"/>
 </g>
 <!-- Node5 -->
 <g id="node6" class="node">
@@ -150,8 +150,8 @@
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M92.5,-128.3353C92.5,-119.0891 92.5,-110.0626 92.5,-101.6311"/>
-<polygon fill="none" stroke="#191970" points="89.0001,-128.4371 92.5,-138.4371 96.0001,-128.4371 89.0001,-128.4371"/>
+<path fill="none" stroke="#191970" d="M92.5,-128.258C92.5,-119.0849 92.5,-110.1025 92.5,-101.6953"/>
+<polygon fill="none" stroke="#191970" points="89.0001,-128.2685 92.5,-138.2685 96.0001,-128.2685 89.0001,-128.2685"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/data__type__rewriter_8h_source.html b/docs/reference/api/doxygen/data__type__rewriter_8h_source.html
index 6c9c9e0fb8..881cabf9fd 100644
--- a/docs/reference/api/doxygen/data__type__rewriter_8h_source.html
+++ b/docs/reference/api/doxygen/data__type__rewriter_8h_source.html
@@ -66,13 +66,12 @@ $(function() {
 <div class="title">data_type_rewriter.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="data__type__rewriter_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment">  [...]
+<a href="data__type__rewriter_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment">  [...]
 <div class="ttc" id="classtvm_1_1tir_1_1DeclBufferNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1DeclBufferNode.html">tvm::tir::DeclBufferNode</a></div><div class="ttdoc">Declare a buffer that can be used in the body. </div><div class="ttdef"><b>Definition:</b> stmt.h:692</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BlockRealizeNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1BlockRealizeNode.html">tvm::tir::BlockRealizeNode</a></div><div class="ttdoc">A block realization node represents execution of the block at the binding values. ...</div><div class="ttdef"><b>Definition:</b> stmt.h:1325</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1DataTypeLegalizer_html_a3c9ea744377f09c9656cc3718eed7bfb"><div class="ttname"><a href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a3c9ea744377f09c9656cc3718eed7bfb">tvm::tir::DataTypeLegalizer::VisitExpr_</a></div><div class="ttdeci">PrimExpr VisitExpr_(const SelectNode *op) override</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeNormalizer_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html">tvm::tir::IndexDataTypeNormalizer</a></div><div class="ttdoc">Normalize the data types of buffer shapes and indices to the same data type. </div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:137</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeRewriter_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></div><div class="ttdoc">Data type rewriter for buffer indices. </div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:93</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeNormalizer_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html">tvm::tir::IndexDataTypeNormalizer</a></div><div class="ttdoc">Normalize the data types of buffer shapes and indices to the same data type. </div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:140</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeRewriter_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html">tvm::tir::IndexDataTypeRewriter</a></div><div class="ttdoc">Data type rewriter for buffer indices. </div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:97</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1IfThenElseNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IfThenElseNode.html">tvm::tir::IfThenElseNode</a></div><div class="ttdoc">IfThenElse statment. </div><div class="ttdef"><b>Definition:</b> stmt.h:828</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1IterVar_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IterVar.html">tvm::tir::IterVar</a></div><div class="ttdoc">Iteration Variable, represents an iteration over an integer interval. </div><div class="ttdef"><b>Definition:</b> var.h:301</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1VarNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1VarNode.html">tvm::tir::VarNode</a></div><div class="ttdoc">A variable node in the IR. </div><div class="ttdef"><b>Definition:</b> var.h:47</div></div>
@@ -93,7 +92,6 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1DataTypeLegalizer_html_abd51f9d25b5e2419034f028b17aded2a"><div class="ttname"><a href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abd51f9d25b5e2419034f028b17aded2a">tvm::tir::DataTypeLegalizer::VisitStmt_</a></div><div class="ttdeci">Stmt VisitStmt_(const ForNode *op) override</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BlockNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1BlockNode.html">tvm::tir::BlockNode</a></div><div class="ttdoc">A block is a basic schedule unit in TIR. </div><div class="ttdef"><b>Definition:</b> stmt.h:1241</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1MaxNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1MaxNode.html">tvm::tir::MaxNode</a></div><div class="ttdoc">max(a, b) </div><div class="ttdef"><b>Definition:</b> expr.h:299</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeRewriter_html_a34ba7e2f7b8a676f84a8fcb37d3c5dba"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb37d3c5dba">tvm::tir::IndexDataTypeRewriter::var_remap_</a></div><div class="ttdeci">Map&lt; Var, Var &gt; var_remap_</div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:126</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Buffer_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Buffer.html">tvm::tir::Buffer</a></div><div class="ttdoc">Buffer is a symbolic n-darray structure. It is a composition of primitive symbolic types...</div><div class="ttdef"><b>Definition:</b> buffer.h:160</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1StmtExprMutator_html"><div class="ttname"><a href="classtvm_1_1tir_1_1StmtExprMutator.html">tvm::tir::StmtExprMutator</a></div><div class="ttdoc">Mutator that recursively mutates stmts and exprs on them. </div><div class="ttdef"><b>Definition:</b> stmt_functor.h:314</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BufferStoreNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1BufferStoreNode.html">tvm::tir::BufferStoreNode</a></div><div class="ttdoc">Store value to the high dimension buffer. </div><div class="ttdef"><b>Definition:</b> stmt.h:290</div></div>
@@ -106,17 +104,20 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1ForNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1ForNode.html">tvm::tir::ForNode</a></div><div class="ttdoc">A for loop, with poissible type annotations. </div><div class="ttdef"><b>Definition:</b> stmt.h:950</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BufferLoadNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1BufferLoadNode.html">tvm::tir::BufferLoadNode</a></div><div class="ttdoc">Load value from the high dimension buffer. </div><div class="ttdef"><b>Definition:</b> expr.h:627</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ModNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1ModNode.html">tvm::tir::ModNode</a></div><div class="ttdoc">a % b in the C semnatics. </div><div class="ttdef"><b>Definition:</b> expr.h:231</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1LetStmtNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1LetStmtNode.html">tvm::tir::LetStmtNode</a></div><div class="ttdoc">Let binding, bind var to value, then run body. </div><div class="ttdef"><b>Definition:</b> stmt.h:65</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1DataTypeLegalizer_html_a947d190184b5f3d1e406216909a2cf18"><div class="ttname"><a href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a947d190184b5f3d1e406216909a2cf18">tvm::tir::DataTypeLegalizer::VisitExpr_</a></div><div class="ttdeci">PrimExpr VisitExpr_(const VarNode *op) override</div></div>
 <div class="ttc" id="classtvm_1_1PrimExpr_html"><div class="ttname"><a href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a></div><div class="ttdoc">Reference to PrimExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:112</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1FloorDivNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1FloorDivNode.html">tvm::tir::FloorDivNode</a></div><div class="ttdoc">Floor division, floor(a/b) </div><div class="ttdef"><b>Definition:</b> expr.h:248</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1SubNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1SubNode.html">tvm::tir::SubNode</a></div><div class="ttdoc">a - b </div><div class="ttdef"><b>Definition:</b> expr.h:174</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1CallNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1CallNode.html">tvm::tir::CallNode</a></div><div class="ttdoc">Call node. </div><div class="ttdef"><b>Definition:</b> expr.h:936</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1SelectNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1SelectNode.html">tvm::tir::SelectNode</a></div><div class="ttdoc">return true_value if condition is true, otherwise return false_value. </div><div class="ttdef"><b>Definition:</b> expr.h:572</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1DivNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1DivNode.html">tvm::tir::DivNode</a></div><div class="ttdoc">a / b in the C semnatics. </div><div class="ttdef"><b>Definition:</b> expr.h:211</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1DataTypeLegalizer_html_a4b60203572648ecb12a2aa72a552318d"><div class="ttname"><a href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">tvm::tir::DataTypeLegalizer::ivmap_</a></div><div class="ttdeci">std::unordered_map&lt; const IterVarNode *, IterVar &gt; ivmap_</div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:81</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1DataTypeLegalizer_html_a4b60203572648ecb12a2aa72a552318d"><div class="ttname"><a href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a4b60203572648ecb12a2aa72a552318d">tvm::tir::DataTypeLegalizer::ivmap_</a></div><div class="ttdeci">std::unordered_map&lt; const IterVarNode *, IterVar &gt; ivmap_</div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:83</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ExprMutator_html_af6e722273818f70d0894aeecd1b55615"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprMutator.html#af6e722273818f70d0894aeecd1b55615">tvm::tir::ExprMutator::VisitExpr_</a></div><div class="ttdeci">PrimExpr VisitExpr_(const VarNode *op) override</div></div>
 <div class="ttc" id="structtvm_1_1tir_1_1LENode_html"><div class="ttname"><a href="structtvm_1_1tir_1_1LENode.html">tvm::tir::LENode</a></div><div class="ttdoc">a &lt;= b </div><div class="ttdef"><b>Definition:</b> expr.h:399</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1DataTypeLegalizer_html_a1de0d50699d7d15618b19d2cb7fadb35"><div class="ttname"><a href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">tvm::tir::DataTypeLegalizer::var_remap_</a></div><div class="ttdeci">std::unordered_map&lt; const VarNode *, Var &gt; var_remap_</div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:85</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1NENode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1NENode.html">tvm::tir::NENode</a></div><div class="ttdoc">a != b </div><div class="ttdef"><b>Definition:</b> expr.h:365</div></div>
-<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeRewriter_html_a8ee3de0fbd707b0062c32c0511d53904"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a8ee3de0fbd707b0062c32c0511d53904">tvm::tir::IndexDataTypeRewriter::buffer_remap_</a></div><div class="ttdeci">Map&lt; Buffer, Buffer &gt; buffer_remap_</div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:127</div></div>
+<div class="ttc" id="classtvm_1_1tir_1_1IndexDataTypeRewriter_html_a8ee3de0fbd707b0062c32c0511d53904"><div class="ttname"><a href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a8ee3de0fbd707b0062c32c0511d53904">tvm::tir::IndexDataTypeRewriter::buffer_remap_</a></div><div class="ttdeci">Map&lt; Buffer, Buffer &gt; buffer_remap_</div><div class="ttdef"><b>Definition:</b> data_type_rewriter.h:130</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_ab45f13dd70d982d9f977c79b6f7fac98"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#ab45f13dd70d982d9f977c79b6f7fac98">tvm::runtime::DataType::Int</a></div><div class="ttdeci">static DataType Int(int bits, int lanes=1)</div><div class="ttdoc">Construct an int type. </div><div class="ttdef"><b>Definition:</b> data_type.h:164</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1GENode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1GENode.html">tvm::tir::GENode</a></div><div class="ttdoc">a &gt;= b </div><div class="ttdef"><b>Definition:</b> expr.h:433</div></div>
 </div><!-- fragment --></div><!-- contents -->
diff --git a/docs/reference/api/doxygen/functions_func_v.html b/docs/reference/api/doxygen/functions_func_v.html
index 64c132e3a4..414dab46a7 100644
--- a/docs/reference/api/doxygen/functions_func_v.html
+++ b/docs/reference/api/doxygen/functions_func_v.html
@@ -445,12 +445,12 @@ $(function() {
 , <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html#a8b1ef43d965026767385fb3ee5791928">tvm::relay::ExprVisitor</a>
 , <a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html#a86656f533b4961437f53d1dbe30ae1fb">tvm::relay::MixedModeMutator</a>
 , <a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html#a98c7381c097a64d67f47dd512d99f045">tvm::relay::MixedModeVisitor</a>
-, <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1503fb6579a078beadfccbe1eba1b717">tvm::tir::DataTypeLegalizer</a>
-, <a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#aaf87a9c9650901e8e88250821c76725c">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#afa46554fba071aae744164711b6112ae">tvm::tir::ExprMutator</a>
-, <a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html#a893148a9d3d5fa9b76ce653f04d19140">tvm::tir::ExprVisitor</a>
+, <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ad56676b279c7602022e9515564ab746d">tvm::tir::DataTypeLegalizer</a>
+, <a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a46dcaa82daff7c3d2365b69174b0635f">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a>
+, <a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a26ea1a3a8efcf9319349533ba3e69456">tvm::tir::ExprMutator</a>
+, <a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html#a375334641aa93fdc9354a0e0dc636284">tvm::tir::ExprVisitor</a>
 , <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html#a305f82080e85e826fc70b30c635bb945">tvm::tir::IndexDataTypeNormalizer</a>
-, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a20c9bd949c70b95f12c1dff43cee1174">tvm::tir::IndexDataTypeRewriter</a>
+, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a006af1f277912d7fdca575e92ac33d1a">tvm::tir::IndexDataTypeRewriter</a>
 </li>
 <li>VisitExprDefault_()
 : <a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html#ab35a37c57578e32a8c873cdfe9e31a0f">tvm::relay::ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a>
@@ -477,9 +477,9 @@ $(function() {
 , <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#ad6692c86b749bb0d93042aa2a0425a74">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
 </li>
 <li>VisitPattern_()
-: <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#a11370205d1de851e817d40f031ad4811">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1relay_1_1PatternMutator.html#a45f7cdfa9d72a3ab0ce2cb4ea04fec5b">tvm::relay::PatternMutator</a>
-, <a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html#a2d9a35bc9be4f5d0badb0c1bb5b86847">tvm::relay::PatternVisitor</a>
+: <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#afe53bd4de34ab8dda2ea3c46a91ea6a8">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
+, <a class="el" href="classtvm_1_1relay_1_1PatternMutator.html#aedeb370baf4bca6018153d01d2594a84">tvm::relay::PatternMutator</a>
+, <a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html#ad5ed2a5c3b88ec027df9e4269dff4b80">tvm::relay::PatternVisitor</a>
 </li>
 <li>VisitPatternDefault_()
 : <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#ad71efcd0b9a937b35f7fd4e2b6131773">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
@@ -495,11 +495,11 @@ $(function() {
 , <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a4306d1beba05fa1ac582503498a6d7ce">tvm::tir::StmtMutator</a>
 </li>
 <li>VisitStmt_()
-: <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#acab424302ee742c9759da1696519496a">tvm::tir::DataTypeLegalizer</a>
-, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aa6411333a5796b7056a674cc03173d71">tvm::tir::IndexDataTypeRewriter</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#a980c3e5119d1898c9c9a3f5003428aef">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aaf5a2292da207073d637cc4d1a0704ce">tvm::tir::StmtMutator</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html#aff2335e1aea1de67bdfb92271c8c0e10">tvm::tir::StmtVisitor</a>
+: <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#aca20c14f1f058ffb3ef6c8bba1fda6f1">tvm::tir::DataTypeLegalizer</a>
+, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a0195f5def23df2aafef01255243390f3">tvm::tir::IndexDataTypeRewriter</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#ad6e1f7239b02db329fb013fadd3f7eaf">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a7bc8fad1381862ce012819bd4762a1b6">tvm::tir::StmtMutator</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html#ae842e15f24937f456d0e42aeb53965df">tvm::tir::StmtVisitor</a>
 </li>
 <li>VisitStmtDefault_()
 : <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#ae51b328e2b59a50bed7112a93dba1aae">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
@@ -514,7 +514,7 @@ $(function() {
 </li>
 <li>VisitType_()
 : <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#ae03e50b74b8bd7551b55205976be592e">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1TypeMutator.html#a89bd7a76f5a736defc7f3b0dda664761">tvm::TypeMutator</a>
+, <a class="el" href="classtvm_1_1TypeMutator.html#a0d7ff530827c63fb3eb18cb720305dca">tvm::TypeMutator</a>
 , <a class="el" href="classtvm_1_1TypeVisitor.html#ae699be9a6ed94a635c315506e0c2a6d2">tvm::TypeVisitor</a>
 </li>
 <li>VisitTypeDefault_()
@@ -534,7 +534,7 @@ $(function() {
 : <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html#a8f8c990ee4fa7cb7472f5440f2ca3bde">tvm::runtime::vm::VMFrame</a>
 </li>
 <li>VMFunction()
-: <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html#af9d2bdcf19642c21bc4909b9e9b6196d">tvm::runtime::vm::VMFunction</a>
+: <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html#aea763069fe1dd6849ce0d1ec336931e0">tvm::runtime::vm::VMFunction</a>
 </li>
 <li>Void()
 : <a class="el" href="classtvm_1_1runtime_1_1DataType.html#ab8dc0832aff8fd7421884c0fe20a3bfd">tvm::runtime::DataType</a>
diff --git a/docs/reference/api/doxygen/functions_v.html b/docs/reference/api/doxygen/functions_v.html
index 2a532fc396..6a9d2cd800 100644
--- a/docs/reference/api/doxygen/functions_v.html
+++ b/docs/reference/api/doxygen/functions_v.html
@@ -177,7 +177,7 @@ $(function() {
 : <a class="el" href="classtvm_1_1tir_1_1Var.html#a21ba7568a83bfc2a5896f9e0ff181129">tvm::tir::Var</a>
 </li>
 <li>var_remap_
-: <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb37d3c5dba">tvm::tir::IndexDataTypeRewriter</a>
+: <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">tvm::tir::DataTypeLegalizer</a>
 </li>
 <li>VarDefFrame()
 : <a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrame.html#a8094608662f9111d64ca3eca3b11673a">tvm::script::printer::VarDefFrame</a>
@@ -578,7 +578,7 @@ $(function() {
 , <a class="el" href="classtvm_1_1relay_1_1DFPatternVisitor.html#ae7e67d3a1709b0a180572417698ffaa8">tvm::relay::DFPatternVisitor</a>
 </li>
 <li>VisitDFPattern_()
-: <a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor_3_01R_07const_01DFPattern_01_6n_00_01Args_8_8_8_08_4.html#a7e2f9943f178774d1a54b52f92933a15">tvm::relay::DFPatternFunctor&lt; R(const DFPattern &amp;n, Args...)&gt;</a>
+: <a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor_3_01R_07const_01DFPattern_01_6n_00_01Args_8_8_8_08_4.html#a677d20c3c1f3c9262823b017bb5e1eb6">tvm::relay::DFPatternFunctor&lt; R(const DFPattern &amp;n, Args...)&gt;</a>
 , <a class="el" href="classtvm_1_1relay_1_1DFPatternVisitor.html#af6cb65b48220b7f937c751f9bfc18e91">tvm::relay::DFPatternVisitor</a>
 </li>
 <li>VisitDFPatternDefault_()
@@ -601,16 +601,16 @@ $(function() {
 </li>
 <li>VisitExpr_()
 : <a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html#af9dc3ff4e078e2c9100db1f9ee6374f8">tvm::relay::ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1relay_1_1ExprMutator.html#affe6bbbc8d394c8a0f4c459c6ae09e8f">tvm::relay::ExprMutator</a>
-, <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html#a72b3aeee22ea51f39c36d028d109cddc">tvm::relay::ExprVisitor</a>
+, <a class="el" href="classtvm_1_1relay_1_1ExprMutator.html#ac22fb8d0001bb57b4dd4a796f32d4517">tvm::relay::ExprMutator</a>
+, <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html#ab35bcfc19e95fbf444317fae7268f2fd">tvm::relay::ExprVisitor</a>
 , <a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html#a86656f533b4961437f53d1dbe30ae1fb">tvm::relay::MixedModeMutator</a>
 , <a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html#a98c7381c097a64d67f47dd512d99f045">tvm::relay::MixedModeVisitor</a>
-, <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#ac1e09304dc701e922244e1a4587be114">tvm::tir::DataTypeLegalizer</a>
-, <a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#aa57d47126caef2a706163d086ec76a12">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#af6e722273818f70d0894aeecd1b55615">tvm::tir::ExprMutator</a>
-, <a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html#a80845900e297ba27e9700b4e8b827118">tvm::tir::ExprVisitor</a>
-, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html#a305f82080e85e826fc70b30c635bb945">tvm::tir::IndexDataTypeNormalizer</a>
-, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aae9177887db863333120ffe4aa8fe64f">tvm::tir::IndexDataTypeRewriter</a>
+, <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a5bad1d0823b0e51fa7c5dbef34f49833">tvm::tir::DataTypeLegalizer</a>
+, <a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a1cf9ab2f51fc07d27d9d390796f37b41">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a>
+, <a class="el" href="classtvm_1_1tir_1_1ExprMutator.html#a44047f3394527b92a7b9b2c09c3d1383">tvm::tir::ExprMutator</a>
+, <a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html#abf1ea11bdeb9df050bc73155ffb50a8a">tvm::tir::ExprVisitor</a>
+, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeNormalizer.html#a83924f4f1bd318d388b492c6d6e5cd6b">tvm::tir::IndexDataTypeNormalizer</a>
+, <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a9a63e5ef4cae7603732bda021380eb0c">tvm::tir::IndexDataTypeRewriter</a>
 </li>
 <li>VisitExprDefault_()
 : <a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html#ab35a37c57578e32a8c873cdfe9e31a0f">tvm::relay::ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a>
@@ -637,9 +637,9 @@ $(function() {
 , <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#ad6692c86b749bb0d93042aa2a0425a74">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
 </li>
 <li>VisitPattern_()
-: <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#aa1bf3196b98aedddc028f14f5dcf5384">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1relay_1_1PatternMutator.html#af8ea941a20a51cba2dc5e9e21f0ffc88">tvm::relay::PatternMutator</a>
-, <a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html#ad5ed2a5c3b88ec027df9e4269dff4b80">tvm::relay::PatternVisitor</a>
+: <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#a11370205d1de851e817d40f031ad4811">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
+, <a class="el" href="classtvm_1_1relay_1_1PatternMutator.html#a5c4cdc5bd1b1929edf9afa3cf85b9857">tvm::relay::PatternMutator</a>
+, <a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html#a615c586aebfe563c7dfee3ff99e8ecb5">tvm::relay::PatternVisitor</a>
 </li>
 <li>VisitPatternDefault_()
 : <a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html#ad71efcd0b9a937b35f7fd4e2b6131773">tvm::relay::PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a>
@@ -655,11 +655,11 @@ $(function() {
 , <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#a4306d1beba05fa1ac582503498a6d7ce">tvm::tir::StmtMutator</a>
 </li>
 <li>VisitStmt_()
-: <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#abd51f9d25b5e2419034f028b17aded2a">tvm::tir::DataTypeLegalizer</a>
+: <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a87e951cbdf97e52218f21db3fbbfaf38">tvm::tir::DataTypeLegalizer</a>
 , <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#aeefb6cdc6b0619833d03167d8fd8e4c8">tvm::tir::IndexDataTypeRewriter</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#afb4abf8cb69c4a9105eb38e262e96bc7">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#aecd16bf1a6715ea36f6c30e5dc2ceae7">tvm::tir::StmtMutator</a>
-, <a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html#ae842e15f24937f456d0e42aeb53965df">tvm::tir::StmtVisitor</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#a5d6146f080b95f73d41be3a2af34c934">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtMutator.html#ab094e88d4bda5882756d136c15db6260">tvm::tir::StmtMutator</a>
+, <a class="el" href="classtvm_1_1tir_1_1StmtVisitor.html#a54d994b1b7deb653e908ff5e59bb691e">tvm::tir::StmtVisitor</a>
 </li>
 <li>VisitStmtDefault_()
 : <a class="el" href="classtvm_1_1tir_1_1StmtFunctor_3_01R_07const_01Stmt_01_6n_00_01Args_8_8_8_01args_08_4.html#ae51b328e2b59a50bed7112a93dba1aae">tvm::tir::StmtFunctor&lt; R(const Stmt &amp;n, Args... args)&gt;</a>
@@ -673,9 +673,9 @@ $(function() {
 , <a class="el" href="classtvm_1_1TypeMutator.html#a84e824911927d98e20a338eab8b75a45">tvm::TypeMutator</a>
 </li>
 <li>VisitType_()
-: <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#ae3a258acfcf5fe3ef0c7e291908d72ff">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
-, <a class="el" href="classtvm_1_1TypeMutator.html#a9c2d7e1a52faac66f55896ecde3f2211">tvm::TypeMutator</a>
-, <a class="el" href="classtvm_1_1TypeVisitor.html#a8f548b8def48ea4f11a3eafa04d74d96">tvm::TypeVisitor</a>
+: <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#a9f728a703d68743e8b305acb68e5eb87">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
+, <a class="el" href="classtvm_1_1TypeMutator.html#a4c7667d35d0a9a28c957165b65536c93">tvm::TypeMutator</a>
+, <a class="el" href="classtvm_1_1TypeVisitor.html#ac8845fbf58c1a1f0ebc23c7ee403aaab">tvm::TypeVisitor</a>
 </li>
 <li>VisitTypeDefault_()
 : <a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html#a91553f9e04c39b3821a70ae4f7b0c597">tvm::TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a>
@@ -697,7 +697,7 @@ $(function() {
 : <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFrame.html#a8f8c990ee4fa7cb7472f5440f2ca3bde">tvm::runtime::vm::VMFrame</a>
 </li>
 <li>VMFunction()
-: <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html#aea763069fe1dd6849ce0d1ec336931e0">tvm::runtime::vm::VMFunction</a>
+: <a class="el" href="structtvm_1_1runtime_1_1vm_1_1VMFunction.html#af9d2bdcf19642c21bc4909b9e9b6196d">tvm::runtime::vm::VMFunction</a>
 </li>
 <li>Void()
 : <a class="el" href="classtvm_1_1runtime_1_1DataType.html#ab8dc0832aff8fd7421884c0fe20a3bfd">tvm::runtime::DataType</a>
diff --git a/docs/reference/api/doxygen/functions_vars_v.html b/docs/reference/api/doxygen/functions_vars_v.html
index dc03a62884..5d4389f3a7 100644
--- a/docs/reference/api/doxygen/functions_vars_v.html
+++ b/docs/reference/api/doxygen/functions_vars_v.html
@@ -146,7 +146,7 @@ $(function() {
 , <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html#ae701bd89e0cafb77ae5f9eec127d1fe8">tvm::tir::LetStmtNode</a>
 </li>
 <li>var_remap_
-: <a class="el" href="classtvm_1_1tir_1_1IndexDataTypeRewriter.html#a34ba7e2f7b8a676f84a8fcb37d3c5dba">tvm::tir::IndexDataTypeRewriter</a>
+: <a class="el" href="classtvm_1_1tir_1_1DataTypeLegalizer.html#a1de0d50699d7d15618b19d2cb7fadb35">tvm::tir::DataTypeLegalizer</a>
 </li>
 <li>variables
 : <a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html#adecd62b78ba2a3fc57778088ff641cf6">tvm::arith::IntConstraintsNode</a>
diff --git a/docs/reference/api/doxygen/hierarchy.html b/docs/reference/api/doxygen/hierarchy.html
index 6f9862731b..cce5684584 100644
--- a/docs/reference/api/doxygen/hierarchy.html
+++ b/docs/reference/api/doxygen/hierarchy.html
@@ -153,9 +153,9 @@ This inheritance list is sorted roughly, but not completely, alphabetically:</di
 <tr id="row_62_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1Framer.html" target="_self">tvm::runtime::micro_rpc::Framer</a></td><td class="desc"></td></tr>
 <tr id="row_63_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classGlobalVar.html" target="_self">GlobalVar</a></td><td class="desc"></td></tr>
 <tr id="row_64_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1algo_1_1GreedyBase.html" target="_self">tvm::tir::usmp::algo::GreedyBase</a></td><td class="desc">This is the base class for Greedy Algorithms where the sorting is specialized in the extended classes based on the greedy criteria </td></tr>
-<tr id="row_65_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html" target="_self">tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_66_" class="even"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_66_" class="arrow" onclick="toggleFolder('66_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">tvm::SEqualReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
-<tr id="row_66_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualHandlerDefault.html" target="_self">tvm::SEqualHandlerDefault</a></td><td class="desc">The default handler for equality testing </td></tr>
+<tr id="row_65_"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_65_" class="arrow" onclick="toggleFolder('65_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">tvm::SEqualReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
+<tr id="row_65_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualHandlerDefault.html" target="_self">tvm::SEqualHandlerDefault</a></td><td class="desc">The default handler for equality testing </td></tr>
+<tr id="row_66_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html" target="_self">tvm::runtime::SimpleObjAllocator::Handler&lt; T &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_67_"><td class="entry"><span style="width:0px;display:inline-block;">&#160;</span><span id="arr_67_" class="arrow" onclick="toggleFolder('67_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">tvm::SHashReducer::Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
 <tr id="row_67_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashHandlerDefault.html" target="_self">tvm::SHashHandlerDefault</a></td><td class="desc">The default handler for hash key computation </td></tr>
 <tr id="row_68_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html" target="_self">dmlc::serializer::Handler&lt; DLDataType &gt;</a></td><td class="desc"></td></tr>
@@ -991,409 +991,408 @@ This inheritance list is sorted roughly, but not completely, alphabetically:</di
 <tr id="row_112_82_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Map.html" target="_self">tvm::runtime::Map&lt; tvm::tir::Var, tvm::Range &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_112_83_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Map.html" target="_self">tvm::runtime::Map&lt; tvm::tir::Var, tvm::tir::Buffer &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_112_84_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Map.html" target="_self">tvm::runtime::Map&lt; tvm::tir::Var, tvm::tir::IterVar &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_85_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Map.html" target="_self">tvm::runtime::Map&lt; tvm::tir::Var, tvm::tir::Var &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_86_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::DiagnosticContext &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_87_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::FloatImm &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_88_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::Integer &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_89_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::IRModule &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_90_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::CostModel &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_91_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::Database &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_92_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::SearchStrategy &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_93_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::SpaceGenerator &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_94_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::PrimExpr &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_95_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::auto_scheduler::MeasureCallback &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_96_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::FloatImm &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_97_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::Integer &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_98_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::ArgInfo &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_99_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::BuilderResult &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_100_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::MeasureCandidate &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_101_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::Postproc &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_102_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::RunnerFuture &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_103_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::ScheduleRule &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_104_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::tir::BufferRegion &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_105_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::tir::Stmt &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_106_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Map&lt; tvm::meta_schedule::Mutator, tvm::FloatImm &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_107_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Map&lt; tvm::runtime::String, tvm::runtime::NDArray &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_108_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Map&lt; tvm::runtime::String, tvm::runtime::ObjectRef &gt; &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_109_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::NDArray &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_110_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::ObjectRef &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_111_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::String &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_112_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::script::printer::ExprDoc &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_113_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::Target &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_114_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::tir::IterVar &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_115_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::tir::Stmt &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_116_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::Type &gt;</a></td><td class="desc"></td></tr>
-<tr id="row_112_117_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_117_" class="arrow" onclick="toggleFolder('112_117_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1AffineType.html" target="_self">tvm::AffineType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1AffineTypeNode.html" title="AffineType representation. ">AffineTypeNode [...]
-<tr id="row_112_117_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineType.html" target="_self">tvm::TensorAffineType</a></td><td class="desc">Managed reference to AffineTypes </td></tr>
-<tr id="row_112_117_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineType.html" target="_self">tvm::TupleAffineType</a></td><td class="desc">Managed reference to TupleAffineTypes </td></tr>
-<tr id="row_112_118_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1ConstIntBound.html" target="_self">tvm::arith::ConstIntBound</a></td><td class="desc">Reference class to <a class="el" href="classtvm_1_1arith_1_1ConstIntBoundNode.html" title="Constant integer up and lower bound(inclusive). Useful for value bound analysis. ">ConstIntBoundNo [...]
-<tr id="row_112_119_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntConstraints.html" target="_self">tvm::arith::IntConstraints</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html" title="Represent integer constrains including (integer) variables, their ranges and the relations be [...]
-<tr id="row_112_120_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransform.html" target="_self">tvm::arith::IntConstraintsTransform</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html" title="We can have different set of variables to represent the same const [...]
-<tr id="row_112_121_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntGroupBounds.html" target="_self">tvm::arith::IntGroupBounds</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntGroupBoundsNode.html" title="Represent integer grouped bounds which are classified into lower bounds (inclusive), upper bo [...]
-<tr id="row_112_122_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntSet.html" target="_self">tvm::arith::IntSet</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntSetNode.html" title="Base class of all Integer set containers. represent a set of integers in one dimension. ">IntSetNode</a> </td></tr>
-<tr id="row_112_123_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterMapResult.html" target="_self">tvm::arith::IterMapResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterMapResultNode.html" title="Result of DetectIterMap. ">IterMapResultNode</a> </td></tr>
-<tr id="row_112_124_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterMark.html" target="_self">tvm::arith::IterMark</a></td><td class="desc">Managed reference to IterMarkExprNode </td></tr>
-<tr id="row_112_125_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1ModularSet.html" target="_self">tvm::arith::ModularSet</a></td><td class="desc">Reference of <a class="el" href="classtvm_1_1arith_1_1ModularSetNode.html" title="Range of a linear integer function. Use to do specify the possible index values. ">ModularSetNode</a> </td></tr>
-<tr id="row_112_126_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1AttrFieldInfo.html" target="_self">tvm::AttrFieldInfo</a></td><td class="desc"><a class="el" href="classtvm_1_1AttrFieldInfo.html" title="AttrFieldInfo. ">AttrFieldInfo</a> </td></tr>
-<tr id="row_112_127_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_127_" class="arrow" onclick="toggleFolder('112_127_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Attrs.html" target="_self">tvm::Attrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseAttrsNode.html" title="Base class of all attribute class. ">BaseAttrsNode</a> [...]
-<tr id="row_112_127_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrs.html" target="_self">tvm::DictAttrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...">DictAttrsNode</a> </td></tr>
-<tr id="row_112_128_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzer.html" target="_self">tvm::auto_scheduler::AccessAnalyzer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzerNode.html" title="Static analyzer for a ComputeDAG. ">AccessAnalyzerNode</a> </td></tr>
-<tr id="row_112_129_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMap.html" target="_self">tvm::auto_scheduler::AttachMap</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMapNode.html" title="stores the compute_at relation between stages This stores a bi-directional mapp [...]
-<tr id="row_112_130_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResult.html" target="_self">tvm::auto_scheduler::BuildResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResultNode.html" title="Store the result of a build. ">BuildResultNode</a> </td></tr>
-<tr id="row_112_131_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html" target="_self">tvm::auto_scheduler::ComputeDAG</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html" title="The auto-scheduler&#39;s computational graph and related program analyses. " [...]
-<tr id="row_112_132_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_132_" class="arrow" onclick="toggleFolder('112_132_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModel.html" target="_self">tvm::auto_scheduler::CostModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1CostModelNode.html" [...]
-<tr id="row_112_132_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedModel.html" target="_self">tvm::auto_scheduler::PythonBasedModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedModelNode.html" title="A wrapper for cost model defined by python code This cla [...]
-<tr id="row_112_132_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RandomModel.html" target="_self">tvm::auto_scheduler::RandomModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RandomModelNode.html" title="The cost model returning random value for all predictions. ">RandomMode [...]
-<tr id="row_112_133_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParams.html" target="_self">tvm::auto_scheduler::HardwareParams</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParamsNode.html" title="The parameters of target hardware used to guide the SearchPolicy [...]
-<tr id="row_112_134_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1Iterator.html" target="_self">tvm::auto_scheduler::Iterator</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1IteratorNode.html" title="An iterator of a for-loop Similar to tvm::IterVar in include/tvm/tir/expr.h ">Iter [...]
-<tr id="row_112_135_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_135_" class="arrow" onclick="toggleFolder('112_135_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallback.html" target="_self">tvm::auto_scheduler::MeasureCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1Measure [...]
-<tr id="row_112_135_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedMeasureCallback.html" target="_self">tvm::auto_scheduler::PythonBasedMeasureCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedMeasureCallbackNode.html" title="A wrapper for measure call [...]
-<tr id="row_112_135_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RecordToFile.html" target="_self">tvm::auto_scheduler::RecordToFile</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RecordToFileNode.html" title="Callback for logging the input and results of measurements to file.  [...]
-<tr id="row_112_136_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInput.html" target="_self">tvm::auto_scheduler::MeasureInput</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInputNode.html" title="Store the input of a measurement. ">MeasureInputNode</a> </td></tr>
-<tr id="row_112_137_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResult.html" target="_self">tvm::auto_scheduler::MeasureResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResultNode.html" title="Store the results of a measurement. ">MeasureResultNode</a> </td></tr>
-<tr id="row_112_138_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_138_" class="arrow" onclick="toggleFolder('112_138_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramBuilder.html" target="_self">tvm::auto_scheduler::ProgramBuilder</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramBu [...]
-<tr id="row_112_138_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilder.html" target="_self">tvm::auto_scheduler::LocalBuilder</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilderNode.html" title="LocalBuilder use local CPU cores to build programs in parallel. ">Lo [...]
-<tr id="row_112_139_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramMeasurer.html" target="_self">tvm::auto_scheduler::ProgramMeasurer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramMeasurerNode.html" title="Measurer that measures the time costs of tvm programs This cl [...]
-<tr id="row_112_140_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_140_" class="arrow" onclick="toggleFolder('112_140_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramRunner.html" target="_self">tvm::auto_scheduler::ProgramRunner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramRunn [...]
-<tr id="row_112_140_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunner.html" target="_self">tvm::auto_scheduler::LocalRunner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunnerNode.html" title="LocalRunner that uses local CPU/GPU to measure the time cost of program [...]
-<tr id="row_112_140_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RPCRunner.html" target="_self">tvm::auto_scheduler::RPCRunner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RPCRunnerNode.html" title="RPCRunner that uses RPC call to measures the time cost of programs on remote  [...]
-<tr id="row_112_141_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RecordReader.html" target="_self">tvm::auto_scheduler::RecordReader</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RecordReaderNode.html" title="Log reader to load step logs from a file. ">RecordReaderNode</a> </td></tr>
-<tr id="row_112_142_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_142_" class="arrow" onclick="toggleFolder('112_142_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchCallback.html" target="_self">tvm::auto_scheduler::SearchCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SearchCal [...]
-<tr id="row_112_142_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PreloadMeasuredStates.html" target="_self">tvm::auto_scheduler::PreloadMeasuredStates</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PreloadMeasuredStatesNode.html" title="Preload measured states from a log file.  [...]
-<tr id="row_112_143_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchPolicy.html" target="_self">tvm::auto_scheduler::SearchPolicy</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SearchPolicyNode.html" title="The base class of search policies. ">SearchPolicyNode</a> </td></tr>
-<tr id="row_112_144_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTask.html" target="_self">tvm::auto_scheduler::SearchTask</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTaskNode.html" title="The computation information and hardware parameters for a specific schedule  [...]
-<tr id="row_112_145_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1Stage.html" target="_self">tvm::auto_scheduler::Stage</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StageNode.html" title="A op stage in the compute declaration. Similar to te::Stage in include/tvm/te/schedule.h. " [...]
-<tr id="row_112_146_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1State.html" target="_self">tvm::auto_scheduler::State</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StateNode.html" title="A state in the search process. It consists of the current loop structure and a list of tran [...]
-<tr id="row_112_147_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_147_" class="arrow" onclick="toggleFolder('112_147_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1Step.html" target="_self">tvm::auto_scheduler::Step</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StepNode.html" title="The bas [...]
-<tr id="row_112_147_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStep.html" target="_self">tvm::auto_scheduler::AnnotationStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStepNode.html" title="Annotation step that corresponds to vectorize, parallel, unrol [...]
-<tr id="row_112_147_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStep.html" target="_self">tvm::auto_scheduler::CacheReadStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStepNode.html" title="Cache read step that corresponds to te::Schedule::cache_read. ">C [...]
-<tr id="row_112_147_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStep.html" target="_self">tvm::auto_scheduler::CacheWriteStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStepNode.html" title="Cache write step that corresponds to te::Schedule::cache_write [...]
-<tr id="row_112_147_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStep.html" target="_self">tvm::auto_scheduler::ComputeAtStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html" title="Compute at step that corresponds to te::Stage::compute_at. ">Comp [...]
-<tr id="row_112_147_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html" target="_self">tvm::auto_scheduler::ComputeInlineStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html" title="Compute inline step that corresponds to te::Stage::co [...]
-<tr id="row_112_147_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStep.html" target="_self">tvm::auto_scheduler::ComputeRootStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStepNode.html" title="Compute root step that corresponds to te::Stage::compute_ro [...]
-<tr id="row_112_147_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStep.html" target="_self">tvm::auto_scheduler::FollowFusedSplitStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode.html" title="Similar to FollowSplitStep, but uses split f [...]
-<tr id="row_112_147_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStep.html" target="_self">tvm::auto_scheduler::FollowSplitStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStepNode.html" title="Similar to SplitStepNode, but uses split factors from anoth [...]
-<tr id="row_112_147_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStep.html" target="_self">tvm::auto_scheduler::FuseStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStepNode.html" title="Fuse step that corresponds to te::Stage::fuse. ">FuseStepNode</a> </td></tr>
-<tr id="row_112_147_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PragmaStep.html" target="_self">tvm::auto_scheduler::PragmaStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PragmaStepNode.html" title="Pragma step that corresponds to te::Stage::pragma. ">PragmaStepNode</a> </ [...]
-<tr id="row_112_147_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ReorderStep.html" target="_self">tvm::auto_scheduler::ReorderStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ReorderStepNode.html" title="Reorder step that corresponds to te::Stage::reorder. ">ReorderStepNode [...]
-<tr id="row_112_147_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RfactorStep.html" target="_self">tvm::auto_scheduler::RfactorStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RfactorStepNode.html" title="Reduction factor step that corresponds to te::Schedule::rfactor. ">Rfa [...]
-<tr id="row_112_147_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SplitStep.html" target="_self">tvm::auto_scheduler::SplitStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SplitStepNode.html" title="Split step that corresponds to te::Stage::split with additional support of m [...]
-<tr id="row_112_147_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1StorageAlignStep.html" target="_self">tvm::auto_scheduler::StorageAlignStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StorageAlignStepNode.html" title="Storage align step that corresponds to te::Stage::stora [...]
-<tr id="row_112_148_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1TuningOptions.html" target="_self">tvm::auto_scheduler::TuningOptions</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1TuningOptionsNode.html" title="Tuning and measurement options. ">TuningOptionsNode</a> </td></tr>
-<tr id="row_112_149_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_149_" class="arrow" onclick="toggleFolder('112_149_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1BaseExpr.html" target="_self">tvm::BaseExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseExprNode.html" title="Base type of all the expressions. ">BaseExprNode< [...]
-<tr id="row_112_149_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_149_0_" class="arrow" onclick="toggleFolder('112_149_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExpr.html" target="_self">tvm::PrimExpr</a></td><td class="desc">Reference to <a class="el" href="classtvm_1_1PrimExprNode.html" title="Base node of all primitive expressions. ">PrimExprN [...]
-<tr id="row_112_149_0_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_149_0_0_" class="arrow" onclick="toggleFolder('112_149_0_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterMapExpr.html" target="_self">tvm::arith::IterMapExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterMapExprNode.html" title="Bas [...]
-<tr id="row_112_149_0_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterSplitExpr.html" target="_self">tvm::arith::IterSplitExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterSplitExprNode.html" title="Split of an iterator. ">IterSplitExprNode</a> </td></tr>
-<tr id="row_112_149_0_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterSumExpr.html" target="_self">tvm::arith::IterSumExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterSumExprNode.html" title="Fuse multiple iterators by summing them with scaling. ">IterSumExprNode</a> </td></tr>
-<tr id="row_112_149_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImm.html" target="_self">tvm::FloatImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1FloatImmNode.html" title="Constant floating point literals in the program. ">FloatImmNode</a> </td></tr>
-<tr id="row_112_149_0_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_149_0_2_" class="arrow" onclick="toggleFolder('112_149_0_2_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImm.html" target="_self">tvm::IntImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IntImmNode.html" title="Constant integer literals in the prog [...]
-<tr id="row_112_149_0_2_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Bool.html" target="_self">tvm::Bool</a></td><td class="desc">Boolean constant </td></tr>
-<tr id="row_112_149_0_2_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Integer.html" target="_self">tvm::Integer</a></td><td class="desc">Container of constant int that adds more constructors </td></tr>
-<tr id="row_112_149_0_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Add.html" target="_self">tvm::tir::Add</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AddNode.html" title="a + b ">AddNode</a> </td></tr>
-<tr id="row_112_149_0_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1And.html" target="_self">tvm::tir::And</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AndNode.html" title="a &amp;&amp; b ">AndNode</a> </td></tr>
-<tr id="row_112_149_0_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Any.html" target="_self">tvm::tir::Any</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AnyNode.html" title="Any shape. ">AnyNode</a> </td></tr>
-<tr id="row_112_149_0_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Broadcast.html" target="_self">tvm::tir::Broadcast</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BroadcastNode.html" title="Create a vector where all the elements are value. ">BroadcastNode</a> </td></tr>
-<tr id="row_112_149_0_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html" target="_self">tvm::tir::BufferLoad</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html" title="Load value from the high dimension buffer. ">BufferLoadNode</a> </td></tr>
-<tr id="row_112_149_0_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Call.html" target="_self">tvm::tir::Call</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1CallNode.html" title="Call node. ">CallNode</a> </td></tr>
-<tr id="row_112_149_0_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Cast.html" target="_self">tvm::tir::Cast</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1CastNode.html" title="Cast value from one data type to another. ">CastNode</a> </td></tr>
-<tr id="row_112_149_0_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Div.html" target="_self">tvm::tir::Div</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DivNode.html" title="a / b in the C semnatics. ">DivNode</a> </td></tr>
-<tr id="row_112_149_0_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1EQ.html" target="_self">tvm::tir::EQ</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1EQNode.html" title="a == b ">EQNode</a> </td></tr>
-<tr id="row_112_149_0_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorDiv.html" target="_self">tvm::tir::FloorDiv</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html" title="Floor division, floor(a/b) ">FloorDivNode</a> </td></tr>
-<tr id="row_112_149_0_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorMod.html" target="_self">tvm::tir::FloorMod</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1FloorModNode.html" title="The remainder of the floordiv. ">FloorModNode</a> </td></tr>
-<tr id="row_112_149_0_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GE.html" target="_self">tvm::tir::GE</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1GENode.html" title="a &gt;= b ">GENode</a> </td></tr>
-<tr id="row_112_149_0_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GT.html" target="_self">tvm::tir::GT</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1GTNode.html" title="a &gt; b ">GTNode</a> </td></tr>
-<tr id="row_112_149_0_16_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LE.html" target="_self">tvm::tir::LE</a></td><td class="desc">Managed reference to <a class="el" href="structtvm_1_1tir_1_1LENode.html" title="a &lt;= b ">LENode</a> </td></tr>
-<tr id="row_112_149_0_17_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Let.html" target="_self">tvm::tir::Let</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LetNode.html" title="Let binding. Bind var to value then evaluate body. ">LetNode</a> </td></tr>
-<tr id="row_112_149_0_18_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Load.html" target="_self">tvm::tir::Load</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LoadNode.html" title="Load the value from buffer_var. ">LoadNode</a> </td></tr>
-<tr id="row_112_149_0_19_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LT.html" target="_self">tvm::tir::LT</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LTNode.html" title="a &lt; b ">LTNode</a> </td></tr>
-<tr id="row_112_149_0_20_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Max.html" target="_self">tvm::tir::Max</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MaxNode.html" title="max(a, b) ">MaxNode</a> </td></tr>
-<tr id="row_112_149_0_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Min.html" target="_self">tvm::tir::Min</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MinNode.html" title="min(a, b) ">MinNode</a> </td></tr>
-<tr id="row_112_149_0_22_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Mod.html" target="_self">tvm::tir::Mod</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ModNode.html" title="a % b in the C semnatics. ">ModNode</a> </td></tr>
-<tr id="row_112_149_0_23_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Mul.html" target="_self">tvm::tir::Mul</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MulNode.html" title="a * b ">MulNode</a> </td></tr>
-<tr id="row_112_149_0_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1NE.html" target="_self">tvm::tir::NE</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1NENode.html" title="a != b ">NENode</a> </td></tr>
-<tr id="row_112_149_0_25_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Not.html" target="_self">tvm::tir::Not</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1NotNode.html" title="!a ">NotNode</a> </td></tr>
-<tr id="row_112_149_0_26_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Or.html" target="_self">tvm::tir::Or</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1OrNode.html" title="a || b ">OrNode</a> </td></tr>
-<tr id="row_112_149_0_27_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProducerLoad.html" target="_self">tvm::tir::ProducerLoad</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ProducerLoadNode.html" title="Load value from the result produced by the producer. ">ProducerLoadNode</a> </td></tr>
-<tr id="row_112_149_0_28_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Ramp.html" target="_self">tvm::tir::Ramp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1RampNode.html" title="Construct a vector with lanes elements where its i-th element equals base + i * stride. This is useful to construct a index  [...]
-<tr id="row_112_149_0_29_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Reduce.html" target="_self">tvm::tir::Reduce</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ReduceNode.html" title="Reduction operator operator. ">ReduceNode</a> </td></tr>
-<tr id="row_112_149_0_30_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Select.html" target="_self">tvm::tir::Select</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1SelectNode.html" title="return true_value if condition is true, otherwise return false_value. ">SelectNode</a> </td></tr>
-<tr id="row_112_149_0_31_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Shuffle.html" target="_self">tvm::tir::Shuffle</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ShuffleNode.html" title="Shuffle instruction. vec = concat(vectors) result = (vec[indices[0]], vec[indices[1]] ...">ShuffleNode</a> </td></tr>
-<tr id="row_112_149_0_32_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StringImm.html" target="_self">tvm::tir::StringImm</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1StringImmNode.html" title="String constants, only used in asserts. ">StringImmNode</a> </td></tr>
-<tr id="row_112_149_0_33_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Sub.html" target="_self">tvm::tir::Sub</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1SubNode.html" title="a - b ">SubNode</a> </td></tr>
-<tr id="row_112_149_0_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_149_0_34_" class="arrow" onclick="toggleFolder('112_149_0_34_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Var.html" target="_self">tvm::tir::Var</a></td><td class="desc">Named variable in TIR </td></tr>
-<tr id="row_112_149_0_34_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SizeVar.html" target="_self">tvm::tir::SizeVar</a></td><td class="desc">Named variable represents a tensor index size </td></tr>
-<tr id="row_112_149_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_149_1_" class="arrow" onclick="toggleFolder('112_149_1_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExpr.html" target="_self">tvm::RelayExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayExprNode.html" title="Base node of all non-primitive expressio [...]
-<tr id="row_112_149_1_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_149_1_0_" class="arrow" onclick="toggleFolder('112_149_1_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1BaseFunc.html" target="_self">tvm::BaseFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseFuncNode.html" title="Base node of all functions. ">BaseFun [...]
-<tr id="row_112_149_1_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Function.html" target="_self">tvm::relay::Function</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
-<tr id="row_112_149_1_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrimFunc.html" target="_self">tvm::tir::PrimFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html" title="Primitive functions that contains TIR statements. ">PrimFuncNode</a> </td></tr>
-<tr id="row_112_149_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Constructor.html" target="_self">tvm::Constructor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1ConstructorNode.html" title="ADT constructor. Constructors compare by pointer equality. ">ConstructorNode</a> </td></tr>
-<tr id="row_112_149_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVar.html" target="_self">tvm::GlobalVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalVarNode.html" title="Global variable that lives in the top-level module. ">GlobalVarNode</a> </td></tr>
-<tr id="row_112_149_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Op.html" target="_self">tvm::Op</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1OpNode.html" title="Primitive Op(builtin intrinsics) ">OpNode</a> </td></tr>
-<tr id="row_112_149_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Call.html" target="_self">tvm::relay::Call</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Constant.html" target="_self">tvm::relay::Constant</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1If.html" target="_self">tvm::relay::If</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Let.html" target="_self">tvm::relay::Let</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Match.html" target="_self">tvm::relay::Match</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreate.html" target="_self">tvm::relay::RefCreate</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefRead.html" target="_self">tvm::relay::RefRead</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWrite.html" target="_self">tvm::relay::RefWrite</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExpr.html" target="_self">tvm::relay::TempExpr</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Tuple.html" target="_self">tvm::relay::Tuple</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html" target="_self">tvm::relay::TupleGetItem</a></td><td class="desc"></td></tr>
-<tr id="row_112_149_1_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Var.html" target="_self">tvm::relay::Var</a></td><td class="desc"></td></tr>
-<tr id="row_112_150_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1CompilationConfig.html" target="_self">tvm::CompilationConfig</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1CompilationConfig.html" title="Managed reference class to CompilationConfig. ">CompilationConfig</a></code> </td></tr>
-<tr id="row_112_151_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantInfo.html" target="_self">tvm::ConstantInfo</a></td><td class="desc"></td></tr>
-<tr id="row_112_152_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantMemoryPools.html" target="_self">tvm::ConstantMemoryPools</a></td><td class="desc"></td></tr>
-<tr id="row_112_153_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Diagnostic.html" target="_self">tvm::Diagnostic</a></td><td class="desc"></td></tr>
-<tr id="row_112_154_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContext.html" target="_self">tvm::DiagnosticContext</a></td><td class="desc"></td></tr>
-<tr id="row_112_155_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRenderer.html" target="_self">tvm::DiagnosticRenderer</a></td><td class="desc"></td></tr>
-<tr id="row_112_156_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFunc.html" target="_self">tvm::EnvFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1EnvFuncNode.html" title="A serializable function backed by TVM&#39;s global environment. ">EnvFuncNode</a> </td></tr>
-<tr id="row_112_157_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFunc.html" target="_self">tvm::GenericFunc</a></td><td class="desc">Generic function that can be specialized on a per-target basis </td></tr>
-<tr id="row_112_158_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupply.html" target="_self">tvm::GlobalVarSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" title="GlobalVarSupply can be used to generate unique GlobalVars. ">GlobalVarSupplyNode</a> </td></tr>
-<tr id="row_112_159_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1instrument_1_1PassInstrument.html" target="_self">tvm::instrument::PassInstrument</a></td><td class="desc">Managed reference class for <a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html" title="PassInstrumentNode forms an instrument implementation. It provides API  [...]
-<tr id="row_112_160_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModule.html" target="_self">tvm::IRModule</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IRModuleNode.html" title="IRModule that holds functions and type definitions. ">IRModuleNode</a> </td></tr>
-<tr id="row_112_161_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfo.html" target="_self">tvm::MemoryInfo</a></td><td class="desc">Defines memory info </td></tr>
-<tr id="row_112_162_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_162_" class="arrow" onclick="toggleFolder('112_162_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfo.html" target="_self">tvm::meta_schedule::ArgInfo</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfoNode.html" title="T [...]
-<tr id="row_112_162_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TensorInfo.html" target="_self">tvm::meta_schedule::TensorInfo</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TensorInfoNode.html" title="The tensor argument information. ">TensorInfoNode</a> </td></tr>
-<tr id="row_112_163_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Builder.html" target="_self">tvm::meta_schedule::Builder</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1BuilderNode.html" title="The abstract builder interface. ">BuilderNode</a> </td></tr>
-<tr id="row_112_164_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInput.html" target="_self">tvm::meta_schedule::BuilderInput</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInputNode.html" title="The builder&#39;s input, containing an IRModule and the target. ">Builder [...]
-<tr id="row_112_165_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResult.html" target="_self">tvm::meta_schedule::BuilderResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResultNode.html" title="The builder&#39;s output, containing the artifact path or error messag [...]
-<tr id="row_112_166_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1CostModel.html" target="_self">tvm::meta_schedule::CostModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1CostModelNode.html" title="Cost model. ">CostModelNode</a> </td></tr>
-<tr id="row_112_167_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Database.html" target="_self">tvm::meta_schedule::Database</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1DatabaseNode.html">DatabaseNode</a> </td></tr>
-<tr id="row_112_168_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTask.html" target="_self">tvm::meta_schedule::ExtractedTask</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTaskNode.html" title="A tuning task extracted from the high-level IR. ">ExtractedTaskNode</a [...]
-<tr id="row_112_169_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractor.html" target="_self">tvm::meta_schedule::FeatureExtractor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractorNode.html" title="Extractor for features from measure candidates for use in cos [...]
-<tr id="row_112_170_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallback.html" target="_self">tvm::meta_schedule::MeasureCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html" title="Rules to apply after measure results is available. ">MeasureCall [...]
-<tr id="row_112_171_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidate.html" target="_self">tvm::meta_schedule::MeasureCandidate</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidateNode.html" title="The schedule (with input shapes) to be measured. ">MeasureCan [...]
-<tr id="row_112_172_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Mutator.html" target="_self">tvm::meta_schedule::Mutator</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1MutatorNode.html" title="Mutator is designed to mutate the trace to explore the design space. ">MutatorNode</a> < [...]
-<tr id="row_112_173_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Postproc.html" target="_self">tvm::meta_schedule::Postproc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1PostprocNode.html" title="Rules to apply a postprocessor to a schedule. ">PostprocNode</a> </td></tr>
-<tr id="row_112_174_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Profiler.html" target="_self">tvm::meta_schedule::Profiler</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ProfilerNode.html" title="A generic profiler. ">ProfilerNode</a> </td></tr>
-<tr id="row_112_175_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Runner.html" target="_self">tvm::meta_schedule::Runner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerNode.html" title="The abstract runner interface. ">RunnerNode</a> </td></tr>
-<tr id="row_112_176_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1RunnerFuture.html" target="_self">tvm::meta_schedule::RunnerFuture</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerFutureNode.html" title="A class to asynchronously fetch runner&#39;s output. ">RunnerFutureNode</ [...]
-<tr id="row_112_177_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1RunnerInput.html" target="_self">tvm::meta_schedule::RunnerInput</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerInputNode.html" title="Runner&#39;s input containing path of artifact, type of device and argument  [...]
-<tr id="row_112_178_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1RunnerResult.html" target="_self">tvm::meta_schedule::RunnerResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerResultNode.html" title="Runner&#39;s output containing measurement result of MeasureCandidate or  [...]
-<tr id="row_112_179_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1ScheduleRule.html" target="_self">tvm::meta_schedule::ScheduleRule</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ScheduleRuleNode.html" title="Rules to modify a block in a schedule. ">ScheduleRuleNode</a> </td></tr>
-<tr id="row_112_180_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1SearchStrategy.html" target="_self">tvm::meta_schedule::SearchStrategy</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1SearchStrategyNode.html" title="The search strategy for measure candidates generation. ">SearchStra [...]
-<tr id="row_112_181_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1SpaceGenerator.html" target="_self">tvm::meta_schedule::SpaceGenerator</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1SpaceGeneratorNode.html" title="The abstract class for design space generation. ">SpaceGeneratorNod [...]
-<tr id="row_112_182_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TaskRecord.html" target="_self">tvm::meta_schedule::TaskRecord</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TaskRecordNode.html">TaskRecordNode</a> </td></tr>
-<tr id="row_112_183_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TaskScheduler.html" target="_self">tvm::meta_schedule::TaskScheduler</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TaskSchedulerNode.html" title="The abstract interface of task schedulers. ">TaskSchedulerNode</a> </td></tr>
-<tr id="row_112_184_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TuneContext.html" target="_self">tvm::meta_schedule::TuneContext</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TuneContextNode.html" title="The auto tuning context. ">TuneContextNode</a> </td></tr>
-<tr id="row_112_185_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TuningRecord.html" target="_self">tvm::meta_schedule::TuningRecord</a></td><td class="desc">The managed reference of <a class="el" href="classtvm_1_1meta__schedule_1_1TuningRecordNode.html" title="The class of tuning records. ">TuningRecordNode</a> </td></tr>
-<tr id="row_112_186_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Workload.html" target="_self">tvm::meta_schedule::Workload</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1WorkloadNode.html" title="A workload, i.e. an IRModule and its structural hash. ">WorkloadNode</a> </td></tr>
-<tr id="row_112_187_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupply.html" target="_self">tvm::NameSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1NameSupplyNode.html" title="NameSupply can be used to generate unique names. ">NameSupplyNode</a> </td></tr>
-<tr id="row_112_188_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_188_" class="arrow" onclick="toggleFolder('112_188_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPath.html" target="_self">tvm::ObjectPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ArrayIndexPath.html" target="_self">tvm::ArrayIndexPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1AttributeAccessPath.html" target="_self">tvm::AttributeAccessPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePath.html" target="_self">tvm::MapValuePath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPath.html" target="_self">tvm::MissingArrayElementPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPath.html" target="_self">tvm::MissingMapEntryPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPath.html" target="_self">tvm::RootPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_188_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPath.html" target="_self">tvm::UnknownAttributeAccessPath</a></td><td class="desc"></td></tr>
-<tr id="row_112_189_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPair.html" target="_self">tvm::ObjectPathPair</a></td><td class="desc"></td></tr>
-<tr id="row_112_190_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1parser_1_1Source.html" target="_self">tvm::parser::Source</a></td><td class="desc"></td></tr>
-<tr id="row_112_191_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1parser_1_1SourceMap.html" target="_self">tvm::parser::SourceMap</a></td><td class="desc"></td></tr>
-<tr id="row_112_192_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_192_" class="arrow" onclick="toggleFolder('112_192_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfo.html" target="_self">tvm::PoolInfo</a></td><td class="desc">Base class for <a class="el" href="classtvm_1_1WorkspacePoolInfo.html">WorkspacePoolInfo</a> and <a class="el" href="classtvm_1_ [...]
-<tr id="row_112_192_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantPoolInfo.html" target="_self">tvm::ConstantPoolInfo</a></td><td class="desc"></td></tr>
-<tr id="row_112_192_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspacePoolInfo.html" target="_self">tvm::WorkspacePoolInfo</a></td><td class="desc"></td></tr>
-<tr id="row_112_193_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfoProperties.html" target="_self">tvm::PoolInfoProperties</a></td><td class="desc"></td></tr>
-<tr id="row_112_194_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Range.html" target="_self">tvm::Range</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> constainer </td></tr>
-<tr id="row_112_195_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Clause.html" target="_self">tvm::relay::Clause</a></td><td class="desc"></td></tr>
-<tr id="row_112_196_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html" target="_self">tvm::relay::ConstructorValue</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_197_" class="arrow" onclick="toggleFolder('112_197_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPattern.html" target="_self">tvm::relay::DFPattern</a></td><td class="desc">Managed reference to dataflow patterns </td></tr>
-<tr id="row_112_197_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1AltPattern.html" target="_self">tvm::relay::AltPattern</a></td><td class="desc">A pattern which matches either of two patterns </td></tr>
-<tr id="row_112_197_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1AttrPattern.html" target="_self">tvm::relay::AttrPattern</a></td><td class="desc">A pattern which matches attributes in another pattern </td></tr>
-<tr id="row_112_197_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1CallPattern.html" target="_self">tvm::relay::CallPattern</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantPattern.html" target="_self">tvm::relay::ConstantPattern</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DataTypePattern.html" target="_self">tvm::relay::DataTypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
-<tr id="row_112_197_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html" target="_self">tvm::relay::DominatorPattern</a></td><td class="desc">A pattern which matches a variable length dominator path </td></tr>
-<tr id="row_112_197_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html" target="_self">tvm::relay::ExprPattern</a></td><td class="desc">A pattern which matches a literal expression </td></tr>
-<tr id="row_112_197_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionPattern.html" target="_self">tvm::relay::FunctionPattern</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
-<tr id="row_112_197_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfPattern.html" target="_self">tvm::relay::IfPattern</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetPattern.html" target="_self">tvm::relay::LetPattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> binding that binds a local var </td></tr>
-<tr id="row_112_197_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ShapePattern.html" target="_self">tvm::relay::ShapePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
-<tr id="row_112_197_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemPattern.html" target="_self">tvm::relay::TupleGetItemPattern</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TuplePattern.html" target="_self">tvm::relay::TuplePattern</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TypePattern.html" target="_self">tvm::relay::TypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
-<tr id="row_112_197_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarPattern.html" target="_self">tvm::relay::VarPattern</a></td><td class="desc"></td></tr>
-<tr id="row_112_197_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1WildcardPattern.html" target="_self">tvm::relay::WildcardPattern</a></td><td class="desc">A pattern which matches anything </td></tr>
-<tr id="row_112_198_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternCallback.html" target="_self">tvm::relay::DFPatternCallback</a></td><td class="desc">Managed reference to dataflow pattern callbacks </td></tr>
-<tr id="row_112_199_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Executor.html" target="_self">tvm::relay::Executor</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html" title="Executor information. ">ExecutorNode</a> </td></tr>
-<tr id="row_112_200_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Id.html" target="_self">tvm::relay::Id</a></td><td class="desc"></td></tr>
-<tr id="row_112_201_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html" target="_self">tvm::relay::OpImplementation</a></td><td class="desc">Operator implementation class </td></tr>
-<tr id="row_112_202_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html" target="_self">tvm::relay::OpSpecialization</a></td><td class="desc">Operator specialization class </td></tr>
-<tr id="row_112_203_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html" target="_self">tvm::relay::OpStrategy</a></td><td class="desc">Operator strategy class </td></tr>
-<tr id="row_112_204_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_204_" class="arrow" onclick="toggleFolder('112_204_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Pattern.html" target="_self">tvm::relay::Pattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in [...]
-<tr id="row_112_204_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html" target="_self">tvm::relay::PatternConstructor</a></td><td class="desc"></td></tr>
-<tr id="row_112_204_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html" target="_self">tvm::relay::PatternTuple</a></td><td class="desc"></td></tr>
-<tr id="row_112_204_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVar.html" target="_self">tvm::relay::PatternVar</a></td><td class="desc"></td></tr>
-<tr id="row_112_204_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html" target="_self">tvm::relay::PatternWildcard</a></td><td class="desc"></td></tr>
-<tr id="row_112_205_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosure.html" target="_self">tvm::relay::RecClosure</a></td><td class="desc"></td></tr>
-<tr id="row_112_206_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefValue.html" target="_self">tvm::relay::RefValue</a></td><td class="desc"></td></tr>
-<tr id="row_112_207_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Runtime.html" target="_self">tvm::relay::Runtime</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html" title="Runtime information. ">RuntimeNode</a> </td></tr>
-<tr id="row_112_208_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1ADT.html" target="_self">tvm::runtime::ADT</a></td><td class="desc">Reference to algebraic data type objects </td></tr>
-<tr id="row_112_209_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Array.html" target="_self">tvm::runtime::Array&lt; T, typename &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Array.html" title="Array, container representing a contiguous sequence of ObjectRefs. ">Array</a>, container representing a contiguous se [...]
-<tr id="row_112_210_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_210_" class="arrow" onclick="toggleFolder('112_210_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Closure.html" target="_self">tvm::runtime::Closure</a></td><td class="desc">Reference to closure </td></tr>
-<tr id="row_112_210_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html" target="_self">tvm::relay::InterpreterClosure</a></td><td class="desc"></td></tr>
-<tr id="row_112_210_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1vm_1_1VMClosure.html" target="_self">tvm::runtime::vm::VMClosure</a></td><td class="desc">Reference to closure </td></tr>
-<tr id="row_112_211_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Map.html" target="_self">tvm::runtime::Map&lt; K, V, typename, typename &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Map.html" title="Map container of NodeRef-&gt;NodeRef in DSL graph. Map implements copy on write semantics, which means map is m [...]
-<tr id="row_112_212_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_212_" class="arrow" onclick="toggleFolder('112_212_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataBase.html" target="_self">tvm::runtime::metadata::MetadataBase</a></td><td class="desc">Reference class for the common <a class="el" href="classtvm_1_1runtime_1_1meta [...]
-<tr id="row_112_212_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ConstantInfoMetadata.html" target="_self">tvm::runtime::metadata::ConstantInfoMetadata</a></td><td class="desc"></td></tr>
-<tr id="row_112_212_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1Metadata.html" target="_self">tvm::runtime::metadata::Metadata</a></td><td class="desc"></td></tr>
-<tr id="row_112_212_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArray.html" target="_self">tvm::runtime::metadata::MetadataArray</a></td><td class="desc">Reference class for <a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArray.html" title="Reference class for MetadataArray. ">MetadataArray</a> </td></tr>
-<tr id="row_112_212_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1TensorInfo.html" target="_self">tvm::runtime::metadata::TensorInfo</a></td><td class="desc"></td></tr>
-<tr id="row_112_213_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Module.html" target="_self">tvm::runtime::Module</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Module.html" title="Module container of TVM. ">Module</a> container of TVM </td></tr>
-<tr id="row_112_214_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1NDArray.html" target="_self">tvm::runtime::NDArray</a></td><td class="desc">Managed <a class="el" href="classtvm_1_1runtime_1_1NDArray.html" title="Managed NDArray. The array is backed by reference counted blocks. ">NDArray</a>. The array is backed by reference counted blo [...]
-<tr id="row_112_215_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; T &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Optional.html" title="Optional container that to represent to a Nullable variant of T. ">Optional</a> container that to represent to a Nullab [...]
-<tr id="row_112_216_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1PackedFunc.html" target="_self">tvm::runtime::PackedFunc</a></td><td class="desc">Packed function is a type-erased function. The arguments are passed by packed format </td></tr>
-<tr id="row_112_217_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DeviceWrapper.html" target="_self">tvm::runtime::profiling::DeviceWrapper</a></td><td class="desc">Wrapper for <code>Device</code> </td></tr>
-<tr id="row_112_218_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollector.html" target="_self">tvm::runtime::profiling::MetricCollector</a></td><td class="desc">Wrapper for <code><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html" title="Interface for user defined profiling metric collec [...]
-<tr id="row_112_219_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1Report.html" target="_self">tvm::runtime::profiling::Report</a></td><td class="desc"></td></tr>
-<tr id="row_112_220_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1ShapeTuple.html" target="_self">tvm::runtime::ShapeTuple</a></td><td class="desc">Reference to shape tuple objects </td></tr>
-<tr id="row_112_221_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1String.html" target="_self">tvm::runtime::String</a></td><td class="desc">Reference to string objects </td></tr>
-<tr id="row_112_222_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Timer.html" target="_self">tvm::runtime::Timer</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Timer.html" title="Timer for a specific device. ">Timer</a> for a specific device </td></tr>
-<tr id="row_112_223_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Storage.html" target="_self">tvm::runtime::vm::Storage</a></td><td class="desc">Reference to storage </td></tr>
-<tr id="row_112_224_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html" target="_self">tvm::script::ir_builder::IRBuilder</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html" title="A dialect-agnostic IRBuilder that constructs any IR of TVM.  [...]
-<tr id="row_112_225_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_225_" class="arrow" onclick="toggleFolder('112_225_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html" target="_self">tvm::script::ir_builder::IRBuilderFrame</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__b [...]
-<tr id="row_112_225_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrame.html" target="_self">tvm::script::ir_builder::ir::IRModuleFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html" title="A frame that represents the IRMod [...]
-<tr id="row_112_225_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_225_1_" class="arrow" onclick="toggleFolder('112_225_1_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1TIRFrame.html" target="_self">tvm::script::ir_builder::tir::TIRFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir [...]
-<tr id="row_112_225_1_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateConstFrame.html" target="_self">tvm::script::ir_builder::tir::AllocateConstFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateConstFrameNode.html" title="A frame repre [...]
-<tr id="row_112_225_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateFrame.html" target="_self">tvm::script::ir_builder::tir::AllocateFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateFrameNode.html" title="A frame represents the alloc [...]
-<tr id="row_112_225_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrame.html" target="_self">tvm::script::ir_builder::tir::AssertFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrameNode.html" title="A frame that represents the assert [...]
-<tr id="row_112_225_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AttrFrame.html" target="_self">tvm::script::ir_builder::tir::AttrFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AttrFrameNode.html" title="A frame that represents attribute node.  [...]
-<tr id="row_112_225_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockFrame.html" target="_self">tvm::script::ir_builder::tir::BlockFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockFrameNode.html" title="A frame that represents the block. "> [...]
-<tr id="row_112_225_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockInitFrame.html" target="_self">tvm::script::ir_builder::tir::BlockInitFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockInitFrameNode.html" title="A frame that represents t [...]
-<tr id="row_112_225_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1DeclBufferFrame.html" target="_self">tvm::script::ir_builder::tir::DeclBufferFrame</a></td><td class="desc"></td></tr>
-<tr id="row_112_225_1_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrame.html" target="_self">tvm::script::ir_builder::tir::ElseFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrameNode.html" title="A frame that represents else. ">ElseFram [...]
-<tr id="row_112_225_1_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ForFrame.html" target="_self">tvm::script::ir_builder::tir::ForFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ForFrameNode.html" title="A frame that represents the for loop. ">For [...]
-<tr id="row_112_225_1_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1IfFrame.html" target="_self">tvm::script::ir_builder::tir::IfFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1IfFrameNode.html" title="A frame that represents if statement. ">IfFram [...]
-<tr id="row_112_225_1_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LaunchThreadFrame.html" target="_self">tvm::script::ir_builder::tir::LaunchThreadFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LaunchThreadFrameNode.html" title="The LaunchThrea [...]
-<tr id="row_112_225_1_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LetFrame.html" target="_self">tvm::script::ir_builder::tir::LetFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LetFrameNode.html" title="A frame represents the let binding express [...]
-<tr id="row_112_225_1_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrame.html" target="_self">tvm::script::ir_builder::tir::PrimFuncFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrameNode.html" title="A frame that represents the [...]
-<tr id="row_112_225_1_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1RealizeFrame.html" target="_self">tvm::script::ir_builder::tir::RealizeFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1RealizeFrameNode.html" title="A frame that represents realiz [...]
-<tr id="row_112_225_1_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ThenFrame.html" target="_self">tvm::script::ir_builder::tir::ThenFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ThenFrameNode.html" title="A frame that represents then. ">ThenFra [...]
-<tr id="row_112_225_1_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1WhileFrame.html" target="_self">tvm::script::ir_builder::tir::WhileFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1WhileFrameNode.html" title="A frame that represents while loop.  [...]
-<tr id="row_112_226_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_226_" class="arrow" onclick="toggleFolder('112_226_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" target="_self">tvm::script::printer::Doc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html" title="The base [...]
-<tr id="row_112_226_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_226_0_" class="arrow" onclick="toggleFolder('112_226_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html" target="_self">tvm::script::printer::ExprDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.htm [...]
-<tr id="row_112_226_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDoc.html" target="_self">tvm::script::printer::AttrAccessDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDocNode.html" title="Doc that represents attribute access on another expression. " [...]
-<tr id="row_112_226_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDoc.html" target="_self">tvm::script::printer::CallDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1CallDocNode.html" title="Doc that represents function call. ">CallDocNode</a> </td></tr>
-<tr id="row_112_226_0_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html" target="_self">tvm::script::printer::DictDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html" title="Doc that represents dictionary literal. ">DictDocNode</a> </td></tr>
-<tr id="row_112_226_0_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDoc.html" target="_self">tvm::script::printer::IdDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html" title="Doc that represents identifier. ">IdDocNode</a> </td></tr>
-<tr id="row_112_226_0_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDoc.html" target="_self">tvm::script::printer::IndexDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html" title="Doc that represents index access on another expression. ">IndexDocNode</a> < [...]
-<tr id="row_112_226_0_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDoc.html" target="_self">tvm::script::printer::LambdaDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html" title="Doc that represents anonymous function. ">LambdaDocNode</a> </td></tr>
-<tr id="row_112_226_0_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html" target="_self">tvm::script::printer::ListDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html" title="Doc that represents list literal. ">ListDocNode</a> </td></tr>
-<tr id="row_112_226_0_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html" target="_self">tvm::script::printer::LiteralDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html" title="Doc that represents literal value. ">LiteralDocNode</a> </td></tr>
-<tr id="row_112_226_0_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDoc.html" target="_self">tvm::script::printer::OperationDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDocNode.html" title="Doc that represents operation. ">OperationDocNode</a> </td></tr>
-<tr id="row_112_226_0_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDoc.html" target="_self">tvm::script::printer::TupleDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDocNode.html" title="Doc that represents tuple literal. ">TupleDocNode</a> </td></tr>
-<tr id="row_112_226_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDoc.html" target="_self">tvm::script::printer::SliceDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDocNode.html" title="Doc that represents slice in Index expression. ">SliceDocNode</a> </td></tr>
-<tr id="row_112_226_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDoc.html" target="_self">tvm::script::printer::StmtBlockDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDocNode.html" title="The container doc that holds a list of StmtDoc. ">StmtBlockDocNode [...]
-<tr id="row_112_226_3_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_226_3_" class="arrow" onclick="toggleFolder('112_226_3_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDoc.html" target="_self">tvm::script::printer::StmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDocNode.htm [...]
-<tr id="row_112_226_3_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDoc.html" target="_self">tvm::script::printer::AssertDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html" title="Doc that represents assert statement. ">AssertDocNode</a> </td></tr>
-<tr id="row_112_226_3_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDoc.html" target="_self">tvm::script::printer::AssignDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDocNode.html" title="Doc that represents assign statement. ">AssignDocNode</a> </td></tr>
-<tr id="row_112_226_3_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDoc.html" target="_self">tvm::script::printer::ClassDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDocNode.html" title="Doc that represents class definition. ">ClassDocNode</a> </td></tr>
-<tr id="row_112_226_3_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDoc.html" target="_self">tvm::script::printer::ExprStmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html" title="Doc that represents an expression as statement. ">ExprStmtDocNode</ [...]
-<tr id="row_112_226_3_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDoc.html" target="_self">tvm::script::printer::ForDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html" title="Doc that represents for statement. ">ForDocNode</a> </td></tr>
-<tr id="row_112_226_3_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDoc.html" target="_self">tvm::script::printer::FunctionDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html" title="Doc that represents function definition. ">FunctionDocNode</a> </td></tr>
-<tr id="row_112_226_3_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDoc.html" target="_self">tvm::script::printer::IfDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html" title="Doc that represent if-then-else statement. ">IfDocNode</a> </td></tr>
-<tr id="row_112_226_3_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDoc.html" target="_self">tvm::script::printer::ReturnDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDocNode.html" title="Doc that represents return statement. ">ReturnDocNode</a> </td></tr>
-<tr id="row_112_226_3_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDoc.html" target="_self">tvm::script::printer::ScopeDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html" title="Doc that represents special scopes. ">ScopeDocNode</a> </td></tr>
-<tr id="row_112_226_3_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDoc.html" target="_self">tvm::script::printer::WhileDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDocNode.html" title="Doc that represents while statement. ">WhileDocNode</a> </td></tr>
-<tr id="row_112_227_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_227_" class="arrow" onclick="toggleFolder('112_227_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Frame.html" target="_self">tvm::script::printer::Frame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html">FrameNode [...]
-<tr id="row_112_227_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrame.html" target="_self">tvm::script::printer::MetadataFrame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrameNode.html" title="MetadataFrame contains information like contant parameter array [...]
-<tr id="row_112_227_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrame.html" target="_self">tvm::script::printer::VarDefFrame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrameNode.html" title="VarDefFrame contains information about the free variables that needs  [...]
-<tr id="row_112_228_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html" target="_self">tvm::script::printer::IRDocsifier</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html" title="IRDocsifier is the top-level interface in the IR-&gt;Doc process. ">IRD [...]
-<tr id="row_112_229_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1RootNodeContainer.html" target="_self">tvm::script::printer::RootNodeContainer</a></td><td class="desc"></td></tr>
-<tr id="row_112_230_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarTable.html" target="_self">tvm::script::printer::VarTable</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1VarTableNode.html" title="Variable Table manages mapping from variable object to ExprDoc during the proces [...]
-<tr id="row_112_231_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceName.html" target="_self">tvm::SourceName</a></td><td class="desc">The source name of a file span </td></tr>
-<tr id="row_112_232_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Span.html" target="_self">tvm::Span</a></td><td class="desc"></td></tr>
-<tr id="row_112_233_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Target.html" target="_self">tvm::Target</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetNode.html" title="Compilation target. ">TargetNode</a> </td></tr>
-<tr id="row_112_234_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKind.html" target="_self">tvm::TargetKind</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetKindNode.html" title="Target kind, specifies the kind of the target. ">TargetKindNode</a> </td></tr>
-<tr id="row_112_235_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTag.html" target="_self">tvm::TargetTag</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetTagNode.html" title="A target tag. ">TargetTagNode</a> </td></tr>
-<tr id="row_112_236_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1IterVarAttr.html" target="_self">tvm::te::IterVarAttr</a></td><td class="desc">Additional scheduable attributes about IterVar </td></tr>
-<tr id="row_112_237_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_237_" class="arrow" onclick="toggleFolder('112_237_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1IterVarRelation.html" target="_self">tvm::te::IterVarRelation</a></td><td class="desc">The schedule relation between IterVars can be <a class="el" href="classtvm_1_1te_1_1Split.html" title="M [...]
-<tr id="row_112_237_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Fuse.html" target="_self">tvm::te::Fuse</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1FuseNode.html" title="Fuse two domains into one domain. ">FuseNode</a> </td></tr>
-<tr id="row_112_237_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Rebase.html" target="_self">tvm::te::Rebase</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1RebaseNode.html" title="Rebase the iteration to make min to be 0. This is useful to normalize the Schedule to make every leaf...">RebaseNode</a> </td></tr>
-<tr id="row_112_237_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Singleton.html" target="_self">tvm::te::Singleton</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1SingletonNode.html" title="Singleton iterator [0, 1) ">SingletonNode</a> </td></tr>
-<tr id="row_112_237_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Split.html" target="_self">tvm::te::Split</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1SplitNode.html" title="Split the parent domain into product of outer and iter. ">SplitNode</a> </td></tr>
-<tr id="row_112_237_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Transform.html" target="_self">tvm::te::Transform</a></td><td class="desc"></td></tr>
-<tr id="row_112_238_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_238_" class="arrow" onclick="toggleFolder('112_238_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Operation.html" target="_self">tvm::te::Operation</a></td><td class="desc"><a class="el" href="classtvm_1_1te_1_1Operation.html" title="Operation that produces tensors. ">Operation</a> that p [...]
-<tr id="row_112_238_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1ComputeOp.html" target="_self">tvm::te::ComputeOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html" title="A Compute op that compute a tensor on certain domain. ">ComputeOpNode</a> </td></tr>
-<tr id="row_112_238_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1ExternOp.html" target="_self">tvm::te::ExternOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1ExternOpNode.html" title="External computation that cannot be splitted. ">ExternOpNode</a> </td></tr>
-<tr id="row_112_238_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1HybridOp.html" target="_self">tvm::te::HybridOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1HybridOpNode.html" title="A computation operator that generated by hybrid script. ">HybridOpNode</a> </td></tr>
-<tr id="row_112_238_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1PlaceholderOp.html" target="_self">tvm::te::PlaceholderOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1PlaceholderOpNode.html" title="A placeholder op represents an input placeholder. ">PlaceholderOpNode</a> </td></tr>
-<tr id="row_112_238_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1ScanOp.html" target="_self">tvm::te::ScanOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1ScanOpNode.html" title="Symbolic scan. ">ScanOpNode</a> </td></tr>
-<tr id="row_112_238_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1TensorComputeOp.html" target="_self">tvm::te::TensorComputeOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1TensorComputeOpNode.html" title="A TenorCompute op that compute a tensor with an tensor intrinsic. ">TensorComputeOpNode</a> </td></tr>
-<tr id="row_112_239_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Schedule.html" target="_self">tvm::te::Schedule</a></td><td class="desc">Global schedule container For operations and all the operations they depend on. The schedule per <a class="el" href="classtvm_1_1te_1_1Operation.html" title="Operation that produces tensors. ">Operation</a [...]
-<tr id="row_112_240_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1SpecializedCondition.html" target="_self">tvm::te::SpecializedCondition</a></td><td class="desc">Specialized condition to enable op specialization </td></tr>
-<tr id="row_112_241_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Stage.html" target="_self">tvm::te::Stage</a></td><td class="desc"><a class="el" href="classtvm_1_1te_1_1Stage.html" title="Stage, contains scheduling for a stage of computation. ">Stage</a>, contains scheduling for a stage of computation </td></tr>
-<tr id="row_112_242_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1TensorIntrin.html" target="_self">tvm::te::TensorIntrin</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1TensorIntrinNode.html" title="Node to represent a Tensor intrinsic operator. ">TensorIntrinNode</a> </td></tr>
-<tr id="row_112_243_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1TensorIntrinCall.html" target="_self">tvm::te::TensorIntrinCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1TensorIntrinCallNode.html">TensorIntrinCallNode</a> </td></tr>
-<tr id="row_112_244_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BijectiveLayout.html" target="_self">tvm::tir::BijectiveLayout</a></td><td class="desc">Bijective function mapping for data layout transformation. Given two <a class="el" href="classtvm_1_1tir_1_1Layout.html" title="Managed reference to LayoutNode. ">Layout</a>, <a class="el"  [...]
-<tr id="row_112_245_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BlockRV.html" target="_self">tvm::tir::BlockRV</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockRVNode.html" title="A random variable that evaluates to a TensorIR block. ">BlockRVNode</a> </td></tr>
-<tr id="row_112_246_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BlockScope.html" target="_self">tvm::tir::BlockScope</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockScopeNode.html" title="An object with 1-to-1 correspondence with each block reference in the sref tree. This data structure ...">Block [...]
-<tr id="row_112_247_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Buffer.html" target="_self">tvm::tir::Buffer</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1Buffer.html" title="Buffer is a symbolic n-darray structure. It is a composition of primitive symbolic types...">Buffer</a> is a symbolic n-darray structure. It is a co [...]
-<tr id="row_112_248_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferRegion.html" target="_self">tvm::tir::BufferRegion</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRegionNode.html" title="Representing the region of multi-dimensional buffer access. ">BufferRegionNode</a> </td></tr>
-<tr id="row_112_249_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CommReducer.html" target="_self">tvm::tir::CommReducer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html" title="A commutative reducer node to represent a commutative binary operator with identity element...">CommReducerN [...]
-<tr id="row_112_250_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_250_" class="arrow" onclick="toggleFolder('112_250_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1DataProducer.html" target="_self">tvm::tir::DataProducer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DataProducerNode.html" title="Base node for data [...]
-<tr id="row_112_250_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Tensor.html" target="_self">tvm::te::Tensor</a></td><td class="desc"><a class="el" href="classtvm_1_1te_1_1Tensor.html" title="Tensor structure representing a possible input, or intermediate computation result. ">Tensor</a> structure representing a possible input, or intermed [...]
-<tr id="row_112_251_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Dependency.html" target="_self">tvm::tir::Dependency</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DependencyNode.html" title="A tuple (src, dst, kind) representing certain types of dependency. For example, (A, B, kRAW) means block B depe [...]
-<tr id="row_112_252_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IndexMap.html" target="_self">tvm::tir::IndexMap</a></td><td class="desc"></td></tr>
-<tr id="row_112_253_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Instruction.html" target="_self">tvm::tir::Instruction</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1InstructionNode.html" title="Schedule instructions each corresponds to a schedule primitive. ">InstructionNode</a> </td></tr>
-<tr id="row_112_254_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1InstructionKind.html" target="_self">tvm::tir::InstructionKind</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1InstructionKindNode.html" title="Kind of an instruction, e.g. Split, Reorder, etc. Besides the name, every kind of instruction ha [...]
-<tr id="row_112_255_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IterVar.html" target="_self">tvm::tir::IterVar</a></td><td class="desc">Iteration Variable, represents an iteration over an integer interval </td></tr>
-<tr id="row_112_256_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Layout.html" target="_self">tvm::tir::Layout</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LayoutNode.html" title="Layout is to describe how data is organized within an N-dimention tensor. It is composed of upper cas...">LayoutNode</a> </ [...]
-<tr id="row_112_257_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LoopRV.html" target="_self">tvm::tir::LoopRV</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LoopRVNode.html" title="A random variable that evaluates to a TensorIR for loop. ">LoopRVNode</a> </td></tr>
-<tr id="row_112_258_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegion.html" target="_self">tvm::tir::MatchBufferRegion</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MatchBufferRegionNode.html" title="Match introduces a constraint that the source buffer region can be remapped to the data la [...]
-<tr id="row_112_259_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Schedule.html" target="_self">tvm::tir::Schedule</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ScheduleNode.html" title="The user-facing schedule class. ">ScheduleNode</a> </td></tr>
-<tr id="row_112_260_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ScheduleState.html" target="_self">tvm::tir::ScheduleState</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ScheduleStateNode.html" title="The state of scheduling, which exposes a Replace method as the primary interface for all the scheduli. [...]
-<tr id="row_112_261_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_261_" class="arrow" onclick="toggleFolder('112_261_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Stmt.html" target="_self">tvm::tir::Stmt</a></td><td class="desc">Container of all statements </td></tr>
-<tr id="row_112_261_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Allocate.html" target="_self">tvm::tir::Allocate</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AllocateNode.html" title="Allocate a buffer that can be used in body. ">AllocateNode</a> </td></tr>
-<tr id="row_112_261_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1AllocateConst.html" target="_self">tvm::tir::AllocateConst</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AllocateConstNode.html" title="Allocate a buffer that can be used in body. ">AllocateConstNode</a> </td></tr>
-<tr id="row_112_261_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1AssertStmt.html" target="_self">tvm::tir::AssertStmt</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html" title="Assert condition, if an error occurs, return the error message. ">AssertStmtNode</a> </td></tr>
-<tr id="row_112_261_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1AttrStmt.html" target="_self">tvm::tir::AttrStmt</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html" title="Define certain auxiliary attribute for the body to be a symbolic value. This provide auxiliary inform...">AttrStmtN [...]
-<tr id="row_112_261_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Block.html" target="_self">tvm::tir::Block</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockNode.html" title="A block is a basic schedule unit in TIR. ">BlockNode</a> </td></tr>
-<tr id="row_112_261_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BlockRealize.html" target="_self">tvm::tir::BlockRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockRealizeNode.html" title="A block realization node represents execution of the block at the binding values. ...">BlockRealizeNode [...]
-<tr id="row_112_261_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html" target="_self">tvm::tir::BufferRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" title="Annotate the region where the buffer need to be read and write in the body. We only need to allocate [...]
-<tr id="row_112_261_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferStore.html" target="_self">tvm::tir::BufferStore</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" title="Store value to the high dimension buffer. ">BufferStoreNode</a> </td></tr>
-<tr id="row_112_261_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1DeclBuffer.html" target="_self">tvm::tir::DeclBuffer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DeclBufferNode.html" title="Declare a buffer that can be used in the body. ">DeclBufferNode</a> </td></tr>
-<tr id="row_112_261_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Evaluate.html" target="_self">tvm::tir::Evaluate</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html" title="Evaluates an expression. This is mostly used for putting a Call node into Stmt. ">EvaluateNode</a> </td></tr>
-<tr id="row_112_261_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1For.html" target="_self">tvm::tir::For</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ForNode.html" title="A for loop, with poissible type annotations. ">ForNode</a> </td></tr>
-<tr id="row_112_261_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IfThenElse.html" target="_self">tvm::tir::IfThenElse</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html" title="IfThenElse statment. ">IfThenElseNode</a> </td></tr>
-<tr id="row_112_261_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LetStmt.html" target="_self">tvm::tir::LetStmt</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html" title="Let binding, bind var to value, then run body. ">LetStmtNode</a> </td></tr>
-<tr id="row_112_261_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Prefetch.html" target="_self">tvm::tir::Prefetch</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" title="A prefetch hint for a buffer. ">PrefetchNode</a> </td></tr>
-<tr id="row_112_261_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProducerRealize.html" target="_self">tvm::tir::ProducerRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ProducerRealizeNode.html" title="Annotate the bounds where the data produced by the producer need to be written and read in bo [...]
-<tr id="row_112_261_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProducerStore.html" target="_self">tvm::tir::ProducerStore</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ProducerStoreNode.html" title="Store value into mult-dimensional array that will be read by the consumer of the producer. ">Produc [...]
-<tr id="row_112_261_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmt.html" target="_self">tvm::tir::SeqStmt</a></td><td class="desc">Sequence statement </td></tr>
-<tr id="row_112_261_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Store.html" target="_self">tvm::tir::Store</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1StoreNode.html" title="Store value to the buffer. ">StoreNode</a> </td></tr>
-<tr id="row_112_261_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1While.html" target="_self">tvm::tir::While</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1WhileNode.html" title="A While loop. ">WhileNode</a> </td></tr>
-<tr id="row_112_262_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtSRef.html" target="_self">tvm::tir::StmtSRef</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1StmtSRefNode.html" title="An object that refers to schedulable elements (block/for-loop) in TensorIR, aka &quot;sref&quot;. ">StmtSRefNode</a>  [...]
-<tr id="row_112_263_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1TensorIntrin.html" target="_self">tvm::tir::TensorIntrin</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1TensorIntrinNode.html" title="Tensor intrinsics for tensorization. ">TensorIntrinNode</a> </td></tr>
-<tr id="row_112_264_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Trace.html" target="_self">tvm::tir::Trace</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1TraceNode.html" title="An execution trace of a scheduling program. ">TraceNode</a> </td></tr>
-<tr id="row_112_265_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfo.html" target="_self">tvm::tir::usmp::AllocatedPoolInfo</a></td><td class="desc"></td></tr>
-<tr id="row_112_266_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfo.html" target="_self">tvm::tir::usmp::BufferInfo</a></td><td class="desc"></td></tr>
-<tr id="row_112_267_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysis.html" target="_self">tvm::tir::usmp::BufferInfoAnalysis</a></td><td class="desc"></td></tr>
-<tr id="row_112_268_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1PoolAllocation.html" target="_self">tvm::tir::usmp::PoolAllocation</a></td><td class="desc"></td></tr>
-<tr id="row_112_269_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_269_" class="arrow" onclick="toggleFolder('112_269_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1Pass.html" target="_self">tvm::transform::Pass</a></td><td class="desc"></td></tr>
-<tr id="row_112_269_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1Sequential.html" target="_self">tvm::transform::Sequential</a></td><td class="desc"></td></tr>
-<tr id="row_112_270_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1PassContext.html" target="_self">tvm::transform::PassContext</a></td><td class="desc"><a class="el" href="classtvm_1_1transform_1_1PassContext.html" title="PassContext that is used to configure the pass behavior. ">PassContext</a> that is used to configure the pass behav [...]
-<tr id="row_112_271_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1PassInfo.html" target="_self">tvm::transform::PassInfo</a></td><td class="desc">Managed reference class for <a class="el" href="classtvm_1_1transform_1_1PassInfoNode.html" title="Meta data that will be used to help optimization and analysis. ">PassInfoNode</a> </td></tr>
-<tr id="row_112_272_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_272_" class="arrow" onclick="toggleFolder('112_272_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Type.html" target="_self">tvm::Type</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeNode.html" title="Type is the base type of all types. ">TypeNode</a> </td></tr>
-<tr id="row_112_272_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1BaseTensorType.html" target="_self">tvm::BaseTensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseTensorTypeNode.html" title="Base of all Tensor types This container can hold TensorType or GenericTensorType. ...">BaseTensorTypeNode</a> </td></tr>
-<tr id="row_112_272_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncType.html" target="_self">tvm::FuncType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1FuncTypeNode.html" title="Function type. ">FuncTypeNode</a> </td></tr>
-<tr id="row_112_272_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVar.html" target="_self">tvm::GlobalTypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalTypeVarNode.html" title="A global type variable that is used for defining new types or type aliases. ">GlobalTypeVarNode</a> </td></tr>
-<tr id="row_112_272_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteType.html" target="_self">tvm::IncompleteType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1IncompleteTypeNode.html" title="Intermediate values that is used to indicate incomplete type during type inference. ">IncompleteTypeNode</a> </td></tr>
-<tr id="row_112_272_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerType.html" target="_self">tvm::PointerType</a></td><td class="desc"></td></tr>
-<tr id="row_112_272_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimType.html" target="_self">tvm::PrimType</a></td><td class="desc"></td></tr>
-<tr id="row_112_272_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefType.html" target="_self">tvm::RelayRefType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayRefTypeNode.html" title="Reference Type High-level Relay IR. ">RelayRefTypeNode</a> </td></tr>
-<tr id="row_112_272_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorType.html" target="_self">tvm::TensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TensorTypeNode.html" title="This is the most commonly used type in relay. TensorType have a fixed dimension, data type...">TensorTypeNode</a> </td></tr>
-<tr id="row_112_272_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleType.html" target="_self">tvm::TupleType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TupleTypeNode.html" title="The type of tuple values. ">TupleTypeNode</a> </td></tr>
-<tr id="row_112_272_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCall.html" target="_self">tvm::TypeCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeCallNode.html" title="Type function application. ">TypeCallNode</a> </td></tr>
-<tr id="row_112_272_10_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_272_10_" class="arrow" onclick="toggleFolder('112_272_10_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraint.html" target="_self">tvm::TypeConstraint</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeConstraintNode.html" title="Potential Constraints  [...]
-<tr id="row_112_272_10_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelation.html" target="_self">tvm::TypeRelation</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeRelationNode.html" title="User defined type relation, it is an input-output relation on types. ">TypeRelationNode</a> </td></tr>
-<tr id="row_112_272_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeData.html" target="_self">tvm::TypeData</a></td><td class="desc">Stores all data for an Algebraic Data <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> (ADT) </td></tr>
-<tr id="row_112_272_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVar.html" target="_self">tvm::TypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeVarNode.html" title="Type parameter in functions. ">TypeVarNode</a> </td></tr>
-<tr id="row_112_273_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html" target="_self">tvm::TypedEnvFunc&lt; R(Args...)&gt;</a></td><td class="desc">A typed version of <a class="el" href="classtvm_1_1EnvFunc.html" title="Managed reference to EnvFuncNode. ">EnvFunc</a>. It is backed by a GlobalFuncNode internally [...]
-<tr id="row_112_274_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporter.html" target="_self">tvm::TypeReporter</a></td><td class="desc">Container class of <a class="el" href="classtvm_1_1TypeReporter.html" title="Container class of TypeReporter. ">TypeReporter</a> </td></tr>
-<tr id="row_112_275_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDevice.html" target="_self">tvm::VirtualDevice</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1VirtualDeviceNode.html" title="Describes at compile time the constraints on where data is to be stored at runtime down to the (virtu...">Vir [...]
-<tr id="row_112_276_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspaceMemoryPools.html" target="_self">tvm::WorkspaceMemoryPools</a></td><td class="desc"></td></tr>
+<tr id="row_112_85_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::DiagnosticContext &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_86_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::FloatImm &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_87_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::Integer &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_88_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::IRModule &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_89_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::CostModel &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_90_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::Database &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_91_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::SearchStrategy &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_92_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::meta_schedule::SpaceGenerator &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_93_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::PrimExpr &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_94_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::auto_scheduler::MeasureCallback &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_95_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::FloatImm &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_96_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::Integer &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_97_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::ArgInfo &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_98_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::BuilderResult &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_99_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::MeasureCandidate &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_100_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::Postproc &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_101_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::RunnerFuture &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_102_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::meta_schedule::ScheduleRule &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_103_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::tir::BufferRegion &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_104_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Array&lt; tvm::tir::Stmt &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_105_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Map&lt; tvm::meta_schedule::Mutator, tvm::FloatImm &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_106_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Map&lt; tvm::runtime::String, tvm::runtime::NDArray &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_107_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::Map&lt; tvm::runtime::String, tvm::runtime::ObjectRef &gt; &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_108_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::NDArray &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_109_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::ObjectRef &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_110_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::runtime::String &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_111_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::script::printer::ExprDoc &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_112_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::Target &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_113_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::tir::IterVar &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_114_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::tir::Stmt &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_115_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; tvm::Type &gt;</a></td><td class="desc"></td></tr>
+<tr id="row_112_116_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_116_" class="arrow" onclick="toggleFolder('112_116_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1AffineType.html" target="_self">tvm::AffineType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1AffineTypeNode.html" title="AffineType representation. ">AffineTypeNode [...]
+<tr id="row_112_116_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineType.html" target="_self">tvm::TensorAffineType</a></td><td class="desc">Managed reference to AffineTypes </td></tr>
+<tr id="row_112_116_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineType.html" target="_self">tvm::TupleAffineType</a></td><td class="desc">Managed reference to TupleAffineTypes </td></tr>
+<tr id="row_112_117_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1ConstIntBound.html" target="_self">tvm::arith::ConstIntBound</a></td><td class="desc">Reference class to <a class="el" href="classtvm_1_1arith_1_1ConstIntBoundNode.html" title="Constant integer up and lower bound(inclusive). Useful for value bound analysis. ">ConstIntBoundNo [...]
+<tr id="row_112_118_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntConstraints.html" target="_self">tvm::arith::IntConstraints</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html" title="Represent integer constrains including (integer) variables, their ranges and the relations be [...]
+<tr id="row_112_119_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransform.html" target="_self">tvm::arith::IntConstraintsTransform</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html" title="We can have different set of variables to represent the same const [...]
+<tr id="row_112_120_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntGroupBounds.html" target="_self">tvm::arith::IntGroupBounds</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntGroupBoundsNode.html" title="Represent integer grouped bounds which are classified into lower bounds (inclusive), upper bo [...]
+<tr id="row_112_121_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IntSet.html" target="_self">tvm::arith::IntSet</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IntSetNode.html" title="Base class of all Integer set containers. represent a set of integers in one dimension. ">IntSetNode</a> </td></tr>
+<tr id="row_112_122_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterMapResult.html" target="_self">tvm::arith::IterMapResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterMapResultNode.html" title="Result of DetectIterMap. ">IterMapResultNode</a> </td></tr>
+<tr id="row_112_123_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterMark.html" target="_self">tvm::arith::IterMark</a></td><td class="desc">Managed reference to IterMarkExprNode </td></tr>
+<tr id="row_112_124_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1ModularSet.html" target="_self">tvm::arith::ModularSet</a></td><td class="desc">Reference of <a class="el" href="classtvm_1_1arith_1_1ModularSetNode.html" title="Range of a linear integer function. Use to do specify the possible index values. ">ModularSetNode</a> </td></tr>
+<tr id="row_112_125_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1AttrFieldInfo.html" target="_self">tvm::AttrFieldInfo</a></td><td class="desc"><a class="el" href="classtvm_1_1AttrFieldInfo.html" title="AttrFieldInfo. ">AttrFieldInfo</a> </td></tr>
+<tr id="row_112_126_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_126_" class="arrow" onclick="toggleFolder('112_126_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Attrs.html" target="_self">tvm::Attrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseAttrsNode.html" title="Base class of all attribute class. ">BaseAttrsNode</a> [...]
+<tr id="row_112_126_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrs.html" target="_self">tvm::DictAttrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...">DictAttrsNode</a> </td></tr>
+<tr id="row_112_127_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzer.html" target="_self">tvm::auto_scheduler::AccessAnalyzer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzerNode.html" title="Static analyzer for a ComputeDAG. ">AccessAnalyzerNode</a> </td></tr>
+<tr id="row_112_128_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMap.html" target="_self">tvm::auto_scheduler::AttachMap</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMapNode.html" title="stores the compute_at relation between stages This stores a bi-directional mapp [...]
+<tr id="row_112_129_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResult.html" target="_self">tvm::auto_scheduler::BuildResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResultNode.html" title="Store the result of a build. ">BuildResultNode</a> </td></tr>
+<tr id="row_112_130_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html" target="_self">tvm::auto_scheduler::ComputeDAG</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html" title="The auto-scheduler&#39;s computational graph and related program analyses. " [...]
+<tr id="row_112_131_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_131_" class="arrow" onclick="toggleFolder('112_131_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModel.html" target="_self">tvm::auto_scheduler::CostModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1CostModelNode.html" [...]
+<tr id="row_112_131_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedModel.html" target="_self">tvm::auto_scheduler::PythonBasedModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedModelNode.html" title="A wrapper for cost model defined by python code This cla [...]
+<tr id="row_112_131_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RandomModel.html" target="_self">tvm::auto_scheduler::RandomModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RandomModelNode.html" title="The cost model returning random value for all predictions. ">RandomMode [...]
+<tr id="row_112_132_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParams.html" target="_self">tvm::auto_scheduler::HardwareParams</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParamsNode.html" title="The parameters of target hardware used to guide the SearchPolicy [...]
+<tr id="row_112_133_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1Iterator.html" target="_self">tvm::auto_scheduler::Iterator</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1IteratorNode.html" title="An iterator of a for-loop Similar to tvm::IterVar in include/tvm/tir/expr.h ">Iter [...]
+<tr id="row_112_134_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_134_" class="arrow" onclick="toggleFolder('112_134_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallback.html" target="_self">tvm::auto_scheduler::MeasureCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1Measure [...]
+<tr id="row_112_134_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedMeasureCallback.html" target="_self">tvm::auto_scheduler::PythonBasedMeasureCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PythonBasedMeasureCallbackNode.html" title="A wrapper for measure call [...]
+<tr id="row_112_134_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RecordToFile.html" target="_self">tvm::auto_scheduler::RecordToFile</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RecordToFileNode.html" title="Callback for logging the input and results of measurements to file.  [...]
+<tr id="row_112_135_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInput.html" target="_self">tvm::auto_scheduler::MeasureInput</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInputNode.html" title="Store the input of a measurement. ">MeasureInputNode</a> </td></tr>
+<tr id="row_112_136_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResult.html" target="_self">tvm::auto_scheduler::MeasureResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResultNode.html" title="Store the results of a measurement. ">MeasureResultNode</a> </td></tr>
+<tr id="row_112_137_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_137_" class="arrow" onclick="toggleFolder('112_137_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramBuilder.html" target="_self">tvm::auto_scheduler::ProgramBuilder</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramBu [...]
+<tr id="row_112_137_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilder.html" target="_self">tvm::auto_scheduler::LocalBuilder</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilderNode.html" title="LocalBuilder use local CPU cores to build programs in parallel. ">Lo [...]
+<tr id="row_112_138_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramMeasurer.html" target="_self">tvm::auto_scheduler::ProgramMeasurer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramMeasurerNode.html" title="Measurer that measures the time costs of tvm programs This cl [...]
+<tr id="row_112_139_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_139_" class="arrow" onclick="toggleFolder('112_139_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramRunner.html" target="_self">tvm::auto_scheduler::ProgramRunner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramRunn [...]
+<tr id="row_112_139_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunner.html" target="_self">tvm::auto_scheduler::LocalRunner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunnerNode.html" title="LocalRunner that uses local CPU/GPU to measure the time cost of program [...]
+<tr id="row_112_139_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RPCRunner.html" target="_self">tvm::auto_scheduler::RPCRunner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RPCRunnerNode.html" title="RPCRunner that uses RPC call to measures the time cost of programs on remote  [...]
+<tr id="row_112_140_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RecordReader.html" target="_self">tvm::auto_scheduler::RecordReader</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RecordReaderNode.html" title="Log reader to load step logs from a file. ">RecordReaderNode</a> </td></tr>
+<tr id="row_112_141_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_141_" class="arrow" onclick="toggleFolder('112_141_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchCallback.html" target="_self">tvm::auto_scheduler::SearchCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SearchCal [...]
+<tr id="row_112_141_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PreloadMeasuredStates.html" target="_self">tvm::auto_scheduler::PreloadMeasuredStates</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PreloadMeasuredStatesNode.html" title="Preload measured states from a log file.  [...]
+<tr id="row_112_142_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchPolicy.html" target="_self">tvm::auto_scheduler::SearchPolicy</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SearchPolicyNode.html" title="The base class of search policies. ">SearchPolicyNode</a> </td></tr>
+<tr id="row_112_143_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTask.html" target="_self">tvm::auto_scheduler::SearchTask</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTaskNode.html" title="The computation information and hardware parameters for a specific schedule  [...]
+<tr id="row_112_144_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1Stage.html" target="_self">tvm::auto_scheduler::Stage</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StageNode.html" title="A op stage in the compute declaration. Similar to te::Stage in include/tvm/te/schedule.h. " [...]
+<tr id="row_112_145_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1State.html" target="_self">tvm::auto_scheduler::State</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StateNode.html" title="A state in the search process. It consists of the current loop structure and a list of tran [...]
+<tr id="row_112_146_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_146_" class="arrow" onclick="toggleFolder('112_146_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1Step.html" target="_self">tvm::auto_scheduler::Step</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StepNode.html" title="The bas [...]
+<tr id="row_112_146_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStep.html" target="_self">tvm::auto_scheduler::AnnotationStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStepNode.html" title="Annotation step that corresponds to vectorize, parallel, unrol [...]
+<tr id="row_112_146_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStep.html" target="_self">tvm::auto_scheduler::CacheReadStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStepNode.html" title="Cache read step that corresponds to te::Schedule::cache_read. ">C [...]
+<tr id="row_112_146_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStep.html" target="_self">tvm::auto_scheduler::CacheWriteStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStepNode.html" title="Cache write step that corresponds to te::Schedule::cache_write [...]
+<tr id="row_112_146_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStep.html" target="_self">tvm::auto_scheduler::ComputeAtStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html" title="Compute at step that corresponds to te::Stage::compute_at. ">Comp [...]
+<tr id="row_112_146_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html" target="_self">tvm::auto_scheduler::ComputeInlineStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html" title="Compute inline step that corresponds to te::Stage::co [...]
+<tr id="row_112_146_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStep.html" target="_self">tvm::auto_scheduler::ComputeRootStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStepNode.html" title="Compute root step that corresponds to te::Stage::compute_ro [...]
+<tr id="row_112_146_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStep.html" target="_self">tvm::auto_scheduler::FollowFusedSplitStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode.html" title="Similar to FollowSplitStep, but uses split f [...]
+<tr id="row_112_146_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStep.html" target="_self">tvm::auto_scheduler::FollowSplitStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStepNode.html" title="Similar to SplitStepNode, but uses split factors from anoth [...]
+<tr id="row_112_146_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStep.html" target="_self">tvm::auto_scheduler::FuseStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStepNode.html" title="Fuse step that corresponds to te::Stage::fuse. ">FuseStepNode</a> </td></tr>
+<tr id="row_112_146_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1PragmaStep.html" target="_self">tvm::auto_scheduler::PragmaStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1PragmaStepNode.html" title="Pragma step that corresponds to te::Stage::pragma. ">PragmaStepNode</a> </ [...]
+<tr id="row_112_146_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1ReorderStep.html" target="_self">tvm::auto_scheduler::ReorderStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1ReorderStepNode.html" title="Reorder step that corresponds to te::Stage::reorder. ">ReorderStepNode [...]
+<tr id="row_112_146_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1RfactorStep.html" target="_self">tvm::auto_scheduler::RfactorStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1RfactorStepNode.html" title="Reduction factor step that corresponds to te::Schedule::rfactor. ">Rfa [...]
+<tr id="row_112_146_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1SplitStep.html" target="_self">tvm::auto_scheduler::SplitStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1SplitStepNode.html" title="Split step that corresponds to te::Stage::split with additional support of m [...]
+<tr id="row_112_146_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1StorageAlignStep.html" target="_self">tvm::auto_scheduler::StorageAlignStep</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1StorageAlignStepNode.html" title="Storage align step that corresponds to te::Stage::stora [...]
+<tr id="row_112_147_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1auto__scheduler_1_1TuningOptions.html" target="_self">tvm::auto_scheduler::TuningOptions</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1auto__scheduler_1_1TuningOptionsNode.html" title="Tuning and measurement options. ">TuningOptionsNode</a> </td></tr>
+<tr id="row_112_148_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_148_" class="arrow" onclick="toggleFolder('112_148_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1BaseExpr.html" target="_self">tvm::BaseExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseExprNode.html" title="Base type of all the expressions. ">BaseExprNode< [...]
+<tr id="row_112_148_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_148_0_" class="arrow" onclick="toggleFolder('112_148_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExpr.html" target="_self">tvm::PrimExpr</a></td><td class="desc">Reference to <a class="el" href="classtvm_1_1PrimExprNode.html" title="Base node of all primitive expressions. ">PrimExprN [...]
+<tr id="row_112_148_0_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_148_0_0_" class="arrow" onclick="toggleFolder('112_148_0_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterMapExpr.html" target="_self">tvm::arith::IterMapExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterMapExprNode.html" title="Bas [...]
+<tr id="row_112_148_0_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterSplitExpr.html" target="_self">tvm::arith::IterSplitExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterSplitExprNode.html" title="Split of an iterator. ">IterSplitExprNode</a> </td></tr>
+<tr id="row_112_148_0_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1IterSumExpr.html" target="_self">tvm::arith::IterSumExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1arith_1_1IterSumExprNode.html" title="Fuse multiple iterators by summing them with scaling. ">IterSumExprNode</a> </td></tr>
+<tr id="row_112_148_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImm.html" target="_self">tvm::FloatImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1FloatImmNode.html" title="Constant floating point literals in the program. ">FloatImmNode</a> </td></tr>
+<tr id="row_112_148_0_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_148_0_2_" class="arrow" onclick="toggleFolder('112_148_0_2_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImm.html" target="_self">tvm::IntImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IntImmNode.html" title="Constant integer literals in the prog [...]
+<tr id="row_112_148_0_2_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Bool.html" target="_self">tvm::Bool</a></td><td class="desc">Boolean constant </td></tr>
+<tr id="row_112_148_0_2_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Integer.html" target="_self">tvm::Integer</a></td><td class="desc">Container of constant int that adds more constructors </td></tr>
+<tr id="row_112_148_0_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Add.html" target="_self">tvm::tir::Add</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AddNode.html" title="a + b ">AddNode</a> </td></tr>
+<tr id="row_112_148_0_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1And.html" target="_self">tvm::tir::And</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AndNode.html" title="a &amp;&amp; b ">AndNode</a> </td></tr>
+<tr id="row_112_148_0_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Any.html" target="_self">tvm::tir::Any</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AnyNode.html" title="Any shape. ">AnyNode</a> </td></tr>
+<tr id="row_112_148_0_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Broadcast.html" target="_self">tvm::tir::Broadcast</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BroadcastNode.html" title="Create a vector where all the elements are value. ">BroadcastNode</a> </td></tr>
+<tr id="row_112_148_0_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html" target="_self">tvm::tir::BufferLoad</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html" title="Load value from the high dimension buffer. ">BufferLoadNode</a> </td></tr>
+<tr id="row_112_148_0_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Call.html" target="_self">tvm::tir::Call</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1CallNode.html" title="Call node. ">CallNode</a> </td></tr>
+<tr id="row_112_148_0_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Cast.html" target="_self">tvm::tir::Cast</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1CastNode.html" title="Cast value from one data type to another. ">CastNode</a> </td></tr>
+<tr id="row_112_148_0_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Div.html" target="_self">tvm::tir::Div</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DivNode.html" title="a / b in the C semnatics. ">DivNode</a> </td></tr>
+<tr id="row_112_148_0_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1EQ.html" target="_self">tvm::tir::EQ</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1EQNode.html" title="a == b ">EQNode</a> </td></tr>
+<tr id="row_112_148_0_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorDiv.html" target="_self">tvm::tir::FloorDiv</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html" title="Floor division, floor(a/b) ">FloorDivNode</a> </td></tr>
+<tr id="row_112_148_0_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1FloorMod.html" target="_self">tvm::tir::FloorMod</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1FloorModNode.html" title="The remainder of the floordiv. ">FloorModNode</a> </td></tr>
+<tr id="row_112_148_0_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GE.html" target="_self">tvm::tir::GE</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1GENode.html" title="a &gt;= b ">GENode</a> </td></tr>
+<tr id="row_112_148_0_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1GT.html" target="_self">tvm::tir::GT</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1GTNode.html" title="a &gt; b ">GTNode</a> </td></tr>
+<tr id="row_112_148_0_16_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LE.html" target="_self">tvm::tir::LE</a></td><td class="desc">Managed reference to <a class="el" href="structtvm_1_1tir_1_1LENode.html" title="a &lt;= b ">LENode</a> </td></tr>
+<tr id="row_112_148_0_17_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Let.html" target="_self">tvm::tir::Let</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LetNode.html" title="Let binding. Bind var to value then evaluate body. ">LetNode</a> </td></tr>
+<tr id="row_112_148_0_18_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Load.html" target="_self">tvm::tir::Load</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LoadNode.html" title="Load the value from buffer_var. ">LoadNode</a> </td></tr>
+<tr id="row_112_148_0_19_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LT.html" target="_self">tvm::tir::LT</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LTNode.html" title="a &lt; b ">LTNode</a> </td></tr>
+<tr id="row_112_148_0_20_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Max.html" target="_self">tvm::tir::Max</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MaxNode.html" title="max(a, b) ">MaxNode</a> </td></tr>
+<tr id="row_112_148_0_21_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Min.html" target="_self">tvm::tir::Min</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MinNode.html" title="min(a, b) ">MinNode</a> </td></tr>
+<tr id="row_112_148_0_22_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Mod.html" target="_self">tvm::tir::Mod</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ModNode.html" title="a % b in the C semnatics. ">ModNode</a> </td></tr>
+<tr id="row_112_148_0_23_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Mul.html" target="_self">tvm::tir::Mul</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MulNode.html" title="a * b ">MulNode</a> </td></tr>
+<tr id="row_112_148_0_24_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1NE.html" target="_self">tvm::tir::NE</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1NENode.html" title="a != b ">NENode</a> </td></tr>
+<tr id="row_112_148_0_25_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Not.html" target="_self">tvm::tir::Not</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1NotNode.html" title="!a ">NotNode</a> </td></tr>
+<tr id="row_112_148_0_26_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Or.html" target="_self">tvm::tir::Or</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1OrNode.html" title="a || b ">OrNode</a> </td></tr>
+<tr id="row_112_148_0_27_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProducerLoad.html" target="_self">tvm::tir::ProducerLoad</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ProducerLoadNode.html" title="Load value from the result produced by the producer. ">ProducerLoadNode</a> </td></tr>
+<tr id="row_112_148_0_28_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Ramp.html" target="_self">tvm::tir::Ramp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1RampNode.html" title="Construct a vector with lanes elements where its i-th element equals base + i * stride. This is useful to construct a index  [...]
+<tr id="row_112_148_0_29_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Reduce.html" target="_self">tvm::tir::Reduce</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ReduceNode.html" title="Reduction operator operator. ">ReduceNode</a> </td></tr>
+<tr id="row_112_148_0_30_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Select.html" target="_self">tvm::tir::Select</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1SelectNode.html" title="return true_value if condition is true, otherwise return false_value. ">SelectNode</a> </td></tr>
+<tr id="row_112_148_0_31_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Shuffle.html" target="_self">tvm::tir::Shuffle</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ShuffleNode.html" title="Shuffle instruction. vec = concat(vectors) result = (vec[indices[0]], vec[indices[1]] ...">ShuffleNode</a> </td></tr>
+<tr id="row_112_148_0_32_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StringImm.html" target="_self">tvm::tir::StringImm</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1StringImmNode.html" title="String constants, only used in asserts. ">StringImmNode</a> </td></tr>
+<tr id="row_112_148_0_33_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Sub.html" target="_self">tvm::tir::Sub</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1SubNode.html" title="a - b ">SubNode</a> </td></tr>
+<tr id="row_112_148_0_34_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_148_0_34_" class="arrow" onclick="toggleFolder('112_148_0_34_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Var.html" target="_self">tvm::tir::Var</a></td><td class="desc">Named variable in TIR </td></tr>
+<tr id="row_112_148_0_34_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SizeVar.html" target="_self">tvm::tir::SizeVar</a></td><td class="desc">Named variable represents a tensor index size </td></tr>
+<tr id="row_112_148_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_148_1_" class="arrow" onclick="toggleFolder('112_148_1_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExpr.html" target="_self">tvm::RelayExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayExprNode.html" title="Base node of all non-primitive expressio [...]
+<tr id="row_112_148_1_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_112_148_1_0_" class="arrow" onclick="toggleFolder('112_148_1_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1BaseFunc.html" target="_self">tvm::BaseFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseFuncNode.html" title="Base node of all functions. ">BaseFun [...]
+<tr id="row_112_148_1_0_0_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Function.html" target="_self">tvm::relay::Function</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
+<tr id="row_112_148_1_0_1_" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1PrimFunc.html" target="_self">tvm::tir::PrimFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrimFuncNode.html" title="Primitive functions that contains TIR statements. ">PrimFuncNode</a> </td></tr>
+<tr id="row_112_148_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Constructor.html" target="_self">tvm::Constructor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1ConstructorNode.html" title="ADT constructor. Constructors compare by pointer equality. ">ConstructorNode</a> </td></tr>
+<tr id="row_112_148_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVar.html" target="_self">tvm::GlobalVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalVarNode.html" title="Global variable that lives in the top-level module. ">GlobalVarNode</a> </td></tr>
+<tr id="row_112_148_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Op.html" target="_self">tvm::Op</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1OpNode.html" title="Primitive Op(builtin intrinsics) ">OpNode</a> </td></tr>
+<tr id="row_112_148_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Call.html" target="_self">tvm::relay::Call</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Constant.html" target="_self">tvm::relay::Constant</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1If.html" target="_self">tvm::relay::If</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Let.html" target="_self">tvm::relay::Let</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Match.html" target="_self">tvm::relay::Match</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreate.html" target="_self">tvm::relay::RefCreate</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefRead.html" target="_self">tvm::relay::RefRead</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWrite.html" target="_self">tvm::relay::RefWrite</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExpr.html" target="_self">tvm::relay::TempExpr</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Tuple.html" target="_self">tvm::relay::Tuple</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html" target="_self">tvm::relay::TupleGetItem</a></td><td class="desc"></td></tr>
+<tr id="row_112_148_1_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Var.html" target="_self">tvm::relay::Var</a></td><td class="desc"></td></tr>
+<tr id="row_112_149_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1CompilationConfig.html" target="_self">tvm::CompilationConfig</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1CompilationConfig.html" title="Managed reference class to CompilationConfig. ">CompilationConfig</a></code> </td></tr>
+<tr id="row_112_150_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantInfo.html" target="_self">tvm::ConstantInfo</a></td><td class="desc"></td></tr>
+<tr id="row_112_151_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantMemoryPools.html" target="_self">tvm::ConstantMemoryPools</a></td><td class="desc"></td></tr>
+<tr id="row_112_152_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Diagnostic.html" target="_self">tvm::Diagnostic</a></td><td class="desc"></td></tr>
+<tr id="row_112_153_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContext.html" target="_self">tvm::DiagnosticContext</a></td><td class="desc"></td></tr>
+<tr id="row_112_154_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRenderer.html" target="_self">tvm::DiagnosticRenderer</a></td><td class="desc"></td></tr>
+<tr id="row_112_155_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFunc.html" target="_self">tvm::EnvFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1EnvFuncNode.html" title="A serializable function backed by TVM&#39;s global environment. ">EnvFuncNode</a> </td></tr>
+<tr id="row_112_156_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFunc.html" target="_self">tvm::GenericFunc</a></td><td class="desc">Generic function that can be specialized on a per-target basis </td></tr>
+<tr id="row_112_157_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupply.html" target="_self">tvm::GlobalVarSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" title="GlobalVarSupply can be used to generate unique GlobalVars. ">GlobalVarSupplyNode</a> </td></tr>
+<tr id="row_112_158_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1instrument_1_1PassInstrument.html" target="_self">tvm::instrument::PassInstrument</a></td><td class="desc">Managed reference class for <a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html" title="PassInstrumentNode forms an instrument implementation. It provides API  [...]
+<tr id="row_112_159_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModule.html" target="_self">tvm::IRModule</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IRModuleNode.html" title="IRModule that holds functions and type definitions. ">IRModuleNode</a> </td></tr>
+<tr id="row_112_160_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfo.html" target="_self">tvm::MemoryInfo</a></td><td class="desc">Defines memory info </td></tr>
+<tr id="row_112_161_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_161_" class="arrow" onclick="toggleFolder('112_161_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfo.html" target="_self">tvm::meta_schedule::ArgInfo</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfoNode.html" title="T [...]
+<tr id="row_112_161_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TensorInfo.html" target="_self">tvm::meta_schedule::TensorInfo</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TensorInfoNode.html" title="The tensor argument information. ">TensorInfoNode</a> </td></tr>
+<tr id="row_112_162_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Builder.html" target="_self">tvm::meta_schedule::Builder</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1BuilderNode.html" title="The abstract builder interface. ">BuilderNode</a> </td></tr>
+<tr id="row_112_163_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInput.html" target="_self">tvm::meta_schedule::BuilderInput</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInputNode.html" title="The builder&#39;s input, containing an IRModule and the target. ">Builder [...]
+<tr id="row_112_164_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResult.html" target="_self">tvm::meta_schedule::BuilderResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResultNode.html" title="The builder&#39;s output, containing the artifact path or error messag [...]
+<tr id="row_112_165_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1CostModel.html" target="_self">tvm::meta_schedule::CostModel</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1CostModelNode.html" title="Cost model. ">CostModelNode</a> </td></tr>
+<tr id="row_112_166_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Database.html" target="_self">tvm::meta_schedule::Database</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1DatabaseNode.html">DatabaseNode</a> </td></tr>
+<tr id="row_112_167_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTask.html" target="_self">tvm::meta_schedule::ExtractedTask</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTaskNode.html" title="A tuning task extracted from the high-level IR. ">ExtractedTaskNode</a [...]
+<tr id="row_112_168_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractor.html" target="_self">tvm::meta_schedule::FeatureExtractor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractorNode.html" title="Extractor for features from measure candidates for use in cos [...]
+<tr id="row_112_169_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallback.html" target="_self">tvm::meta_schedule::MeasureCallback</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html" title="Rules to apply after measure results is available. ">MeasureCall [...]
+<tr id="row_112_170_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidate.html" target="_self">tvm::meta_schedule::MeasureCandidate</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidateNode.html" title="The schedule (with input shapes) to be measured. ">MeasureCan [...]
+<tr id="row_112_171_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Mutator.html" target="_self">tvm::meta_schedule::Mutator</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1MutatorNode.html" title="Mutator is designed to mutate the trace to explore the design space. ">MutatorNode</a> < [...]
+<tr id="row_112_172_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Postproc.html" target="_self">tvm::meta_schedule::Postproc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1PostprocNode.html" title="Rules to apply a postprocessor to a schedule. ">PostprocNode</a> </td></tr>
+<tr id="row_112_173_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Profiler.html" target="_self">tvm::meta_schedule::Profiler</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ProfilerNode.html" title="A generic profiler. ">ProfilerNode</a> </td></tr>
+<tr id="row_112_174_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Runner.html" target="_self">tvm::meta_schedule::Runner</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerNode.html" title="The abstract runner interface. ">RunnerNode</a> </td></tr>
+<tr id="row_112_175_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1RunnerFuture.html" target="_self">tvm::meta_schedule::RunnerFuture</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerFutureNode.html" title="A class to asynchronously fetch runner&#39;s output. ">RunnerFutureNode</ [...]
+<tr id="row_112_176_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1RunnerInput.html" target="_self">tvm::meta_schedule::RunnerInput</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerInputNode.html" title="Runner&#39;s input containing path of artifact, type of device and argument  [...]
+<tr id="row_112_177_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1RunnerResult.html" target="_self">tvm::meta_schedule::RunnerResult</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1RunnerResultNode.html" title="Runner&#39;s output containing measurement result of MeasureCandidate or  [...]
+<tr id="row_112_178_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1ScheduleRule.html" target="_self">tvm::meta_schedule::ScheduleRule</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1ScheduleRuleNode.html" title="Rules to modify a block in a schedule. ">ScheduleRuleNode</a> </td></tr>
+<tr id="row_112_179_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1SearchStrategy.html" target="_self">tvm::meta_schedule::SearchStrategy</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1SearchStrategyNode.html" title="The search strategy for measure candidates generation. ">SearchStra [...]
+<tr id="row_112_180_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1SpaceGenerator.html" target="_self">tvm::meta_schedule::SpaceGenerator</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1SpaceGeneratorNode.html" title="The abstract class for design space generation. ">SpaceGeneratorNod [...]
+<tr id="row_112_181_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TaskRecord.html" target="_self">tvm::meta_schedule::TaskRecord</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TaskRecordNode.html">TaskRecordNode</a> </td></tr>
+<tr id="row_112_182_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TaskScheduler.html" target="_self">tvm::meta_schedule::TaskScheduler</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TaskSchedulerNode.html" title="The abstract interface of task schedulers. ">TaskSchedulerNode</a> </td></tr>
+<tr id="row_112_183_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TuneContext.html" target="_self">tvm::meta_schedule::TuneContext</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1TuneContextNode.html" title="The auto tuning context. ">TuneContextNode</a> </td></tr>
+<tr id="row_112_184_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1TuningRecord.html" target="_self">tvm::meta_schedule::TuningRecord</a></td><td class="desc">The managed reference of <a class="el" href="classtvm_1_1meta__schedule_1_1TuningRecordNode.html" title="The class of tuning records. ">TuningRecordNode</a> </td></tr>
+<tr id="row_112_185_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1meta__schedule_1_1Workload.html" target="_self">tvm::meta_schedule::Workload</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1meta__schedule_1_1WorkloadNode.html" title="A workload, i.e. an IRModule and its structural hash. ">WorkloadNode</a> </td></tr>
+<tr id="row_112_186_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupply.html" target="_self">tvm::NameSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1NameSupplyNode.html" title="NameSupply can be used to generate unique names. ">NameSupplyNode</a> </td></tr>
+<tr id="row_112_187_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_187_" class="arrow" onclick="toggleFolder('112_187_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPath.html" target="_self">tvm::ObjectPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ArrayIndexPath.html" target="_self">tvm::ArrayIndexPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1AttributeAccessPath.html" target="_self">tvm::AttributeAccessPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePath.html" target="_self">tvm::MapValuePath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPath.html" target="_self">tvm::MissingArrayElementPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPath.html" target="_self">tvm::MissingMapEntryPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPath.html" target="_self">tvm::RootPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_187_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPath.html" target="_self">tvm::UnknownAttributeAccessPath</a></td><td class="desc"></td></tr>
+<tr id="row_112_188_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPair.html" target="_self">tvm::ObjectPathPair</a></td><td class="desc"></td></tr>
+<tr id="row_112_189_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1parser_1_1Source.html" target="_self">tvm::parser::Source</a></td><td class="desc"></td></tr>
+<tr id="row_112_190_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1parser_1_1SourceMap.html" target="_self">tvm::parser::SourceMap</a></td><td class="desc"></td></tr>
+<tr id="row_112_191_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_191_" class="arrow" onclick="toggleFolder('112_191_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfo.html" target="_self">tvm::PoolInfo</a></td><td class="desc">Base class for <a class="el" href="classtvm_1_1WorkspacePoolInfo.html">WorkspacePoolInfo</a> and <a class="el" href="classtvm_1_ [...]
+<tr id="row_112_191_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantPoolInfo.html" target="_self">tvm::ConstantPoolInfo</a></td><td class="desc"></td></tr>
+<tr id="row_112_191_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspacePoolInfo.html" target="_self">tvm::WorkspacePoolInfo</a></td><td class="desc"></td></tr>
+<tr id="row_112_192_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfoProperties.html" target="_self">tvm::PoolInfoProperties</a></td><td class="desc"></td></tr>
+<tr id="row_112_193_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Range.html" target="_self">tvm::Range</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> constainer </td></tr>
+<tr id="row_112_194_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Clause.html" target="_self">tvm::relay::Clause</a></td><td class="desc"></td></tr>
+<tr id="row_112_195_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html" target="_self">tvm::relay::ConstructorValue</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_196_" class="arrow" onclick="toggleFolder('112_196_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPattern.html" target="_self">tvm::relay::DFPattern</a></td><td class="desc">Managed reference to dataflow patterns </td></tr>
+<tr id="row_112_196_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1AltPattern.html" target="_self">tvm::relay::AltPattern</a></td><td class="desc">A pattern which matches either of two patterns </td></tr>
+<tr id="row_112_196_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1AttrPattern.html" target="_self">tvm::relay::AttrPattern</a></td><td class="desc">A pattern which matches attributes in another pattern </td></tr>
+<tr id="row_112_196_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1CallPattern.html" target="_self">tvm::relay::CallPattern</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantPattern.html" target="_self">tvm::relay::ConstantPattern</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DataTypePattern.html" target="_self">tvm::relay::DataTypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
+<tr id="row_112_196_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html" target="_self">tvm::relay::DominatorPattern</a></td><td class="desc">A pattern which matches a variable length dominator path </td></tr>
+<tr id="row_112_196_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html" target="_self">tvm::relay::ExprPattern</a></td><td class="desc">A pattern which matches a literal expression </td></tr>
+<tr id="row_112_196_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionPattern.html" target="_self">tvm::relay::FunctionPattern</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
+<tr id="row_112_196_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfPattern.html" target="_self">tvm::relay::IfPattern</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetPattern.html" target="_self">tvm::relay::LetPattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> binding that binds a local var </td></tr>
+<tr id="row_112_196_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ShapePattern.html" target="_self">tvm::relay::ShapePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
+<tr id="row_112_196_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemPattern.html" target="_self">tvm::relay::TupleGetItemPattern</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TuplePattern.html" target="_self">tvm::relay::TuplePattern</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TypePattern.html" target="_self">tvm::relay::TypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
+<tr id="row_112_196_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarPattern.html" target="_self">tvm::relay::VarPattern</a></td><td class="desc"></td></tr>
+<tr id="row_112_196_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1WildcardPattern.html" target="_self">tvm::relay::WildcardPattern</a></td><td class="desc">A pattern which matches anything </td></tr>
+<tr id="row_112_197_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternCallback.html" target="_self">tvm::relay::DFPatternCallback</a></td><td class="desc">Managed reference to dataflow pattern callbacks </td></tr>
+<tr id="row_112_198_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Executor.html" target="_self">tvm::relay::Executor</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html" title="Executor information. ">ExecutorNode</a> </td></tr>
+<tr id="row_112_199_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Id.html" target="_self">tvm::relay::Id</a></td><td class="desc"></td></tr>
+<tr id="row_112_200_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html" target="_self">tvm::relay::OpImplementation</a></td><td class="desc">Operator implementation class </td></tr>
+<tr id="row_112_201_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html" target="_self">tvm::relay::OpSpecialization</a></td><td class="desc">Operator specialization class </td></tr>
+<tr id="row_112_202_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html" target="_self">tvm::relay::OpStrategy</a></td><td class="desc">Operator strategy class </td></tr>
+<tr id="row_112_203_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_203_" class="arrow" onclick="toggleFolder('112_203_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Pattern.html" target="_self">tvm::relay::Pattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in [...]
+<tr id="row_112_203_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html" target="_self">tvm::relay::PatternConstructor</a></td><td class="desc"></td></tr>
+<tr id="row_112_203_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html" target="_self">tvm::relay::PatternTuple</a></td><td class="desc"></td></tr>
+<tr id="row_112_203_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVar.html" target="_self">tvm::relay::PatternVar</a></td><td class="desc"></td></tr>
+<tr id="row_112_203_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html" target="_self">tvm::relay::PatternWildcard</a></td><td class="desc"></td></tr>
+<tr id="row_112_204_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosure.html" target="_self">tvm::relay::RecClosure</a></td><td class="desc"></td></tr>
+<tr id="row_112_205_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefValue.html" target="_self">tvm::relay::RefValue</a></td><td class="desc"></td></tr>
+<tr id="row_112_206_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Runtime.html" target="_self">tvm::relay::Runtime</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html" title="Runtime information. ">RuntimeNode</a> </td></tr>
+<tr id="row_112_207_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1ADT.html" target="_self">tvm::runtime::ADT</a></td><td class="desc">Reference to algebraic data type objects </td></tr>
+<tr id="row_112_208_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Array.html" target="_self">tvm::runtime::Array&lt; T, typename &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Array.html" title="Array, container representing a contiguous sequence of ObjectRefs. ">Array</a>, container representing a contiguous se [...]
+<tr id="row_112_209_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_209_" class="arrow" onclick="toggleFolder('112_209_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Closure.html" target="_self">tvm::runtime::Closure</a></td><td class="desc">Reference to closure </td></tr>
+<tr id="row_112_209_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html" target="_self">tvm::relay::InterpreterClosure</a></td><td class="desc"></td></tr>
+<tr id="row_112_209_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1vm_1_1VMClosure.html" target="_self">tvm::runtime::vm::VMClosure</a></td><td class="desc">Reference to closure </td></tr>
+<tr id="row_112_210_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Map.html" target="_self">tvm::runtime::Map&lt; K, V, typename, typename &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Map.html" title="Map container of NodeRef-&gt;NodeRef in DSL graph. Map implements copy on write semantics, which means map is m [...]
+<tr id="row_112_211_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_211_" class="arrow" onclick="toggleFolder('112_211_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataBase.html" target="_self">tvm::runtime::metadata::MetadataBase</a></td><td class="desc">Reference class for the common <a class="el" href="classtvm_1_1runtime_1_1meta [...]
+<tr id="row_112_211_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ConstantInfoMetadata.html" target="_self">tvm::runtime::metadata::ConstantInfoMetadata</a></td><td class="desc"></td></tr>
+<tr id="row_112_211_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1Metadata.html" target="_self">tvm::runtime::metadata::Metadata</a></td><td class="desc"></td></tr>
+<tr id="row_112_211_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArray.html" target="_self">tvm::runtime::metadata::MetadataArray</a></td><td class="desc">Reference class for <a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArray.html" title="Reference class for MetadataArray. ">MetadataArray</a> </td></tr>
+<tr id="row_112_211_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1TensorInfo.html" target="_self">tvm::runtime::metadata::TensorInfo</a></td><td class="desc"></td></tr>
+<tr id="row_112_212_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Module.html" target="_self">tvm::runtime::Module</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Module.html" title="Module container of TVM. ">Module</a> container of TVM </td></tr>
+<tr id="row_112_213_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1NDArray.html" target="_self">tvm::runtime::NDArray</a></td><td class="desc">Managed <a class="el" href="classtvm_1_1runtime_1_1NDArray.html" title="Managed NDArray. The array is backed by reference counted blocks. ">NDArray</a>. The array is backed by reference counted blo [...]
+<tr id="row_112_214_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Optional.html" target="_self">tvm::runtime::Optional&lt; T &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Optional.html" title="Optional container that to represent to a Nullable variant of T. ">Optional</a> container that to represent to a Nullab [...]
+<tr id="row_112_215_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1PackedFunc.html" target="_self">tvm::runtime::PackedFunc</a></td><td class="desc">Packed function is a type-erased function. The arguments are passed by packed format </td></tr>
+<tr id="row_112_216_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DeviceWrapper.html" target="_self">tvm::runtime::profiling::DeviceWrapper</a></td><td class="desc">Wrapper for <code>Device</code> </td></tr>
+<tr id="row_112_217_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollector.html" target="_self">tvm::runtime::profiling::MetricCollector</a></td><td class="desc">Wrapper for <code><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html" title="Interface for user defined profiling metric collec [...]
+<tr id="row_112_218_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1Report.html" target="_self">tvm::runtime::profiling::Report</a></td><td class="desc"></td></tr>
+<tr id="row_112_219_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1ShapeTuple.html" target="_self">tvm::runtime::ShapeTuple</a></td><td class="desc">Reference to shape tuple objects </td></tr>
+<tr id="row_112_220_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1String.html" target="_self">tvm::runtime::String</a></td><td class="desc">Reference to string objects </td></tr>
+<tr id="row_112_221_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Timer.html" target="_self">tvm::runtime::Timer</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Timer.html" title="Timer for a specific device. ">Timer</a> for a specific device </td></tr>
+<tr id="row_112_222_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Storage.html" target="_self">tvm::runtime::vm::Storage</a></td><td class="desc">Reference to storage </td></tr>
+<tr id="row_112_223_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html" target="_self">tvm::script::ir_builder::IRBuilder</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html" title="A dialect-agnostic IRBuilder that constructs any IR of TVM.  [...]
+<tr id="row_112_224_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_224_" class="arrow" onclick="toggleFolder('112_224_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html" target="_self">tvm::script::ir_builder::IRBuilderFrame</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__b [...]
+<tr id="row_112_224_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrame.html" target="_self">tvm::script::ir_builder::ir::IRModuleFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html" title="A frame that represents the IRMod [...]
+<tr id="row_112_224_1_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_224_1_" class="arrow" onclick="toggleFolder('112_224_1_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1TIRFrame.html" target="_self">tvm::script::ir_builder::tir::TIRFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir [...]
+<tr id="row_112_224_1_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateConstFrame.html" target="_self">tvm::script::ir_builder::tir::AllocateConstFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateConstFrameNode.html" title="A frame repre [...]
+<tr id="row_112_224_1_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateFrame.html" target="_self">tvm::script::ir_builder::tir::AllocateFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AllocateFrameNode.html" title="A frame represents the alloc [...]
+<tr id="row_112_224_1_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrame.html" target="_self">tvm::script::ir_builder::tir::AssertFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrameNode.html" title="A frame that represents the assert [...]
+<tr id="row_112_224_1_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AttrFrame.html" target="_self">tvm::script::ir_builder::tir::AttrFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AttrFrameNode.html" title="A frame that represents attribute node.  [...]
+<tr id="row_112_224_1_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockFrame.html" target="_self">tvm::script::ir_builder::tir::BlockFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockFrameNode.html" title="A frame that represents the block. "> [...]
+<tr id="row_112_224_1_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockInitFrame.html" target="_self">tvm::script::ir_builder::tir::BlockInitFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1BlockInitFrameNode.html" title="A frame that represents t [...]
+<tr id="row_112_224_1_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1DeclBufferFrame.html" target="_self">tvm::script::ir_builder::tir::DeclBufferFrame</a></td><td class="desc"></td></tr>
+<tr id="row_112_224_1_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrame.html" target="_self">tvm::script::ir_builder::tir::ElseFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ElseFrameNode.html" title="A frame that represents else. ">ElseFram [...]
+<tr id="row_112_224_1_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ForFrame.html" target="_self">tvm::script::ir_builder::tir::ForFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ForFrameNode.html" title="A frame that represents the for loop. ">For [...]
+<tr id="row_112_224_1_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1IfFrame.html" target="_self">tvm::script::ir_builder::tir::IfFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1IfFrameNode.html" title="A frame that represents if statement. ">IfFram [...]
+<tr id="row_112_224_1_10_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LaunchThreadFrame.html" target="_self">tvm::script::ir_builder::tir::LaunchThreadFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LaunchThreadFrameNode.html" title="The LaunchThrea [...]
+<tr id="row_112_224_1_11_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LetFrame.html" target="_self">tvm::script::ir_builder::tir::LetFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1LetFrameNode.html" title="A frame represents the let binding express [...]
+<tr id="row_112_224_1_12_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrame.html" target="_self">tvm::script::ir_builder::tir::PrimFuncFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrameNode.html" title="A frame that represents the [...]
+<tr id="row_112_224_1_13_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1RealizeFrame.html" target="_self">tvm::script::ir_builder::tir::RealizeFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1RealizeFrameNode.html" title="A frame that represents realiz [...]
+<tr id="row_112_224_1_14_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ThenFrame.html" target="_self">tvm::script::ir_builder::tir::ThenFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1ThenFrameNode.html" title="A frame that represents then. ">ThenFra [...]
+<tr id="row_112_224_1_15_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1WhileFrame.html" target="_self">tvm::script::ir_builder::tir::WhileFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1WhileFrameNode.html" title="A frame that represents while loop.  [...]
+<tr id="row_112_225_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_225_" class="arrow" onclick="toggleFolder('112_225_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" target="_self">tvm::script::printer::Doc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html" title="The base [...]
+<tr id="row_112_225_0_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_225_0_" class="arrow" onclick="toggleFolder('112_225_0_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html" target="_self">tvm::script::printer::ExprDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.htm [...]
+<tr id="row_112_225_0_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDoc.html" target="_self">tvm::script::printer::AttrAccessDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDocNode.html" title="Doc that represents attribute access on another expression. " [...]
+<tr id="row_112_225_0_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDoc.html" target="_self">tvm::script::printer::CallDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1CallDocNode.html" title="Doc that represents function call. ">CallDocNode</a> </td></tr>
+<tr id="row_112_225_0_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html" target="_self">tvm::script::printer::DictDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html" title="Doc that represents dictionary literal. ">DictDocNode</a> </td></tr>
+<tr id="row_112_225_0_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDoc.html" target="_self">tvm::script::printer::IdDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html" title="Doc that represents identifier. ">IdDocNode</a> </td></tr>
+<tr id="row_112_225_0_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDoc.html" target="_self">tvm::script::printer::IndexDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html" title="Doc that represents index access on another expression. ">IndexDocNode</a> < [...]
+<tr id="row_112_225_0_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDoc.html" target="_self">tvm::script::printer::LambdaDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html" title="Doc that represents anonymous function. ">LambdaDocNode</a> </td></tr>
+<tr id="row_112_225_0_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html" target="_self">tvm::script::printer::ListDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html" title="Doc that represents list literal. ">ListDocNode</a> </td></tr>
+<tr id="row_112_225_0_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html" target="_self">tvm::script::printer::LiteralDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html" title="Doc that represents literal value. ">LiteralDocNode</a> </td></tr>
+<tr id="row_112_225_0_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDoc.html" target="_self">tvm::script::printer::OperationDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDocNode.html" title="Doc that represents operation. ">OperationDocNode</a> </td></tr>
+<tr id="row_112_225_0_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDoc.html" target="_self">tvm::script::printer::TupleDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1TupleDocNode.html" title="Doc that represents tuple literal. ">TupleDocNode</a> </td></tr>
+<tr id="row_112_225_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDoc.html" target="_self">tvm::script::printer::SliceDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1SliceDocNode.html" title="Doc that represents slice in Index expression. ">SliceDocNode</a> </td></tr>
+<tr id="row_112_225_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDoc.html" target="_self">tvm::script::printer::StmtBlockDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtBlockDocNode.html" title="The container doc that holds a list of StmtDoc. ">StmtBlockDocNode [...]
+<tr id="row_112_225_3_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_225_3_" class="arrow" onclick="toggleFolder('112_225_3_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDoc.html" target="_self">tvm::script::printer::StmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1StmtDocNode.htm [...]
+<tr id="row_112_225_3_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDoc.html" target="_self">tvm::script::printer::AssertDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html" title="Doc that represents assert statement. ">AssertDocNode</a> </td></tr>
+<tr id="row_112_225_3_1_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDoc.html" target="_self">tvm::script::printer::AssignDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDocNode.html" title="Doc that represents assign statement. ">AssignDocNode</a> </td></tr>
+<tr id="row_112_225_3_2_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDoc.html" target="_self">tvm::script::printer::ClassDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDocNode.html" title="Doc that represents class definition. ">ClassDocNode</a> </td></tr>
+<tr id="row_112_225_3_3_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDoc.html" target="_self">tvm::script::printer::ExprStmtDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html" title="Doc that represents an expression as statement. ">ExprStmtDocNode</ [...]
+<tr id="row_112_225_3_4_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDoc.html" target="_self">tvm::script::printer::ForDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html" title="Doc that represents for statement. ">ForDocNode</a> </td></tr>
+<tr id="row_112_225_3_5_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDoc.html" target="_self">tvm::script::printer::FunctionDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html" title="Doc that represents function definition. ">FunctionDocNode</a> </td></tr>
+<tr id="row_112_225_3_6_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDoc.html" target="_self">tvm::script::printer::IfDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html" title="Doc that represent if-then-else statement. ">IfDocNode</a> </td></tr>
+<tr id="row_112_225_3_7_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDoc.html" target="_self">tvm::script::printer::ReturnDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ReturnDocNode.html" title="Doc that represents return statement. ">ReturnDocNode</a> </td></tr>
+<tr id="row_112_225_3_8_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDoc.html" target="_self">tvm::script::printer::ScopeDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html" title="Doc that represents special scopes. ">ScopeDocNode</a> </td></tr>
+<tr id="row_112_225_3_9_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDoc.html" target="_self">tvm::script::printer::WhileDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1WhileDocNode.html" title="Doc that represents while statement. ">WhileDocNode</a> </td></tr>
+<tr id="row_112_226_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_226_" class="arrow" onclick="toggleFolder('112_226_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1Frame.html" target="_self">tvm::script::printer::Frame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html">FrameNode [...]
+<tr id="row_112_226_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrame.html" target="_self">tvm::script::printer::MetadataFrame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrameNode.html" title="MetadataFrame contains information like contant parameter array [...]
+<tr id="row_112_226_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrame.html" target="_self">tvm::script::printer::VarDefFrame</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1VarDefFrameNode.html" title="VarDefFrame contains information about the free variables that needs  [...]
+<tr id="row_112_227_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html" target="_self">tvm::script::printer::IRDocsifier</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html" title="IRDocsifier is the top-level interface in the IR-&gt;Doc process. ">IRD [...]
+<tr id="row_112_228_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1RootNodeContainer.html" target="_self">tvm::script::printer::RootNodeContainer</a></td><td class="desc"></td></tr>
+<tr id="row_112_229_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1VarTable.html" target="_self">tvm::script::printer::VarTable</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1VarTableNode.html" title="Variable Table manages mapping from variable object to ExprDoc during the proces [...]
+<tr id="row_112_230_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceName.html" target="_self">tvm::SourceName</a></td><td class="desc">The source name of a file span </td></tr>
+<tr id="row_112_231_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Span.html" target="_self">tvm::Span</a></td><td class="desc"></td></tr>
+<tr id="row_112_232_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Target.html" target="_self">tvm::Target</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetNode.html" title="Compilation target. ">TargetNode</a> </td></tr>
+<tr id="row_112_233_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKind.html" target="_self">tvm::TargetKind</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetKindNode.html" title="Target kind, specifies the kind of the target. ">TargetKindNode</a> </td></tr>
+<tr id="row_112_234_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTag.html" target="_self">tvm::TargetTag</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetTagNode.html" title="A target tag. ">TargetTagNode</a> </td></tr>
+<tr id="row_112_235_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1IterVarAttr.html" target="_self">tvm::te::IterVarAttr</a></td><td class="desc">Additional scheduable attributes about IterVar </td></tr>
+<tr id="row_112_236_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_236_" class="arrow" onclick="toggleFolder('112_236_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1IterVarRelation.html" target="_self">tvm::te::IterVarRelation</a></td><td class="desc">The schedule relation between IterVars can be <a class="el" href="classtvm_1_1te_1_1Split.html" title="M [...]
+<tr id="row_112_236_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Fuse.html" target="_self">tvm::te::Fuse</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1FuseNode.html" title="Fuse two domains into one domain. ">FuseNode</a> </td></tr>
+<tr id="row_112_236_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Rebase.html" target="_self">tvm::te::Rebase</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1RebaseNode.html" title="Rebase the iteration to make min to be 0. This is useful to normalize the Schedule to make every leaf...">RebaseNode</a> </td></tr>
+<tr id="row_112_236_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Singleton.html" target="_self">tvm::te::Singleton</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1SingletonNode.html" title="Singleton iterator [0, 1) ">SingletonNode</a> </td></tr>
+<tr id="row_112_236_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Split.html" target="_self">tvm::te::Split</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1SplitNode.html" title="Split the parent domain into product of outer and iter. ">SplitNode</a> </td></tr>
+<tr id="row_112_236_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Transform.html" target="_self">tvm::te::Transform</a></td><td class="desc"></td></tr>
+<tr id="row_112_237_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_237_" class="arrow" onclick="toggleFolder('112_237_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Operation.html" target="_self">tvm::te::Operation</a></td><td class="desc"><a class="el" href="classtvm_1_1te_1_1Operation.html" title="Operation that produces tensors. ">Operation</a> that p [...]
+<tr id="row_112_237_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1ComputeOp.html" target="_self">tvm::te::ComputeOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html" title="A Compute op that compute a tensor on certain domain. ">ComputeOpNode</a> </td></tr>
+<tr id="row_112_237_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1ExternOp.html" target="_self">tvm::te::ExternOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1ExternOpNode.html" title="External computation that cannot be splitted. ">ExternOpNode</a> </td></tr>
+<tr id="row_112_237_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1HybridOp.html" target="_self">tvm::te::HybridOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1HybridOpNode.html" title="A computation operator that generated by hybrid script. ">HybridOpNode</a> </td></tr>
+<tr id="row_112_237_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1PlaceholderOp.html" target="_self">tvm::te::PlaceholderOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1PlaceholderOpNode.html" title="A placeholder op represents an input placeholder. ">PlaceholderOpNode</a> </td></tr>
+<tr id="row_112_237_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1ScanOp.html" target="_self">tvm::te::ScanOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1ScanOpNode.html" title="Symbolic scan. ">ScanOpNode</a> </td></tr>
+<tr id="row_112_237_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1TensorComputeOp.html" target="_self">tvm::te::TensorComputeOp</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1TensorComputeOpNode.html" title="A TenorCompute op that compute a tensor with an tensor intrinsic. ">TensorComputeOpNode</a> </td></tr>
+<tr id="row_112_238_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Schedule.html" target="_self">tvm::te::Schedule</a></td><td class="desc">Global schedule container For operations and all the operations they depend on. The schedule per <a class="el" href="classtvm_1_1te_1_1Operation.html" title="Operation that produces tensors. ">Operation</a [...]
+<tr id="row_112_239_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1SpecializedCondition.html" target="_self">tvm::te::SpecializedCondition</a></td><td class="desc">Specialized condition to enable op specialization </td></tr>
+<tr id="row_112_240_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Stage.html" target="_self">tvm::te::Stage</a></td><td class="desc"><a class="el" href="classtvm_1_1te_1_1Stage.html" title="Stage, contains scheduling for a stage of computation. ">Stage</a>, contains scheduling for a stage of computation </td></tr>
+<tr id="row_112_241_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1TensorIntrin.html" target="_self">tvm::te::TensorIntrin</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1TensorIntrinNode.html" title="Node to represent a Tensor intrinsic operator. ">TensorIntrinNode</a> </td></tr>
+<tr id="row_112_242_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1TensorIntrinCall.html" target="_self">tvm::te::TensorIntrinCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1te_1_1TensorIntrinCallNode.html">TensorIntrinCallNode</a> </td></tr>
+<tr id="row_112_243_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BijectiveLayout.html" target="_self">tvm::tir::BijectiveLayout</a></td><td class="desc">Bijective function mapping for data layout transformation. Given two <a class="el" href="classtvm_1_1tir_1_1Layout.html" title="Managed reference to LayoutNode. ">Layout</a>, <a class="el"  [...]
+<tr id="row_112_244_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BlockRV.html" target="_self">tvm::tir::BlockRV</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockRVNode.html" title="A random variable that evaluates to a TensorIR block. ">BlockRVNode</a> </td></tr>
+<tr id="row_112_245_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BlockScope.html" target="_self">tvm::tir::BlockScope</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockScopeNode.html" title="An object with 1-to-1 correspondence with each block reference in the sref tree. This data structure ...">Block [...]
+<tr id="row_112_246_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Buffer.html" target="_self">tvm::tir::Buffer</a></td><td class="desc"><a class="el" href="classtvm_1_1tir_1_1Buffer.html" title="Buffer is a symbolic n-darray structure. It is a composition of primitive symbolic types...">Buffer</a> is a symbolic n-darray structure. It is a co [...]
+<tr id="row_112_247_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferRegion.html" target="_self">tvm::tir::BufferRegion</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRegionNode.html" title="Representing the region of multi-dimensional buffer access. ">BufferRegionNode</a> </td></tr>
+<tr id="row_112_248_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1CommReducer.html" target="_self">tvm::tir::CommReducer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html" title="A commutative reducer node to represent a commutative binary operator with identity element...">CommReducerN [...]
+<tr id="row_112_249_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_249_" class="arrow" onclick="toggleFolder('112_249_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1DataProducer.html" target="_self">tvm::tir::DataProducer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DataProducerNode.html" title="Base node for data [...]
+<tr id="row_112_249_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1te_1_1Tensor.html" target="_self">tvm::te::Tensor</a></td><td class="desc"><a class="el" href="classtvm_1_1te_1_1Tensor.html" title="Tensor structure representing a possible input, or intermediate computation result. ">Tensor</a> structure representing a possible input, or intermed [...]
+<tr id="row_112_250_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Dependency.html" target="_self">tvm::tir::Dependency</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DependencyNode.html" title="A tuple (src, dst, kind) representing certain types of dependency. For example, (A, B, kRAW) means block B depe [...]
+<tr id="row_112_251_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IndexMap.html" target="_self">tvm::tir::IndexMap</a></td><td class="desc"></td></tr>
+<tr id="row_112_252_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Instruction.html" target="_self">tvm::tir::Instruction</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1InstructionNode.html" title="Schedule instructions each corresponds to a schedule primitive. ">InstructionNode</a> </td></tr>
+<tr id="row_112_253_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1InstructionKind.html" target="_self">tvm::tir::InstructionKind</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1InstructionKindNode.html" title="Kind of an instruction, e.g. Split, Reorder, etc. Besides the name, every kind of instruction ha [...]
+<tr id="row_112_254_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IterVar.html" target="_self">tvm::tir::IterVar</a></td><td class="desc">Iteration Variable, represents an iteration over an integer interval </td></tr>
+<tr id="row_112_255_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Layout.html" target="_self">tvm::tir::Layout</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LayoutNode.html" title="Layout is to describe how data is organized within an N-dimention tensor. It is composed of upper cas...">LayoutNode</a> </ [...]
+<tr id="row_112_256_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LoopRV.html" target="_self">tvm::tir::LoopRV</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LoopRVNode.html" title="A random variable that evaluates to a TensorIR for loop. ">LoopRVNode</a> </td></tr>
+<tr id="row_112_257_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegion.html" target="_self">tvm::tir::MatchBufferRegion</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1MatchBufferRegionNode.html" title="Match introduces a constraint that the source buffer region can be remapped to the data la [...]
+<tr id="row_112_258_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Schedule.html" target="_self">tvm::tir::Schedule</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ScheduleNode.html" title="The user-facing schedule class. ">ScheduleNode</a> </td></tr>
+<tr id="row_112_259_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ScheduleState.html" target="_self">tvm::tir::ScheduleState</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ScheduleStateNode.html" title="The state of scheduling, which exposes a Replace method as the primary interface for all the scheduli. [...]
+<tr id="row_112_260_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_260_" class="arrow" onclick="toggleFolder('112_260_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Stmt.html" target="_self">tvm::tir::Stmt</a></td><td class="desc">Container of all statements </td></tr>
+<tr id="row_112_260_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Allocate.html" target="_self">tvm::tir::Allocate</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AllocateNode.html" title="Allocate a buffer that can be used in body. ">AllocateNode</a> </td></tr>
+<tr id="row_112_260_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1AllocateConst.html" target="_self">tvm::tir::AllocateConst</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AllocateConstNode.html" title="Allocate a buffer that can be used in body. ">AllocateConstNode</a> </td></tr>
+<tr id="row_112_260_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1AssertStmt.html" target="_self">tvm::tir::AssertStmt</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html" title="Assert condition, if an error occurs, return the error message. ">AssertStmtNode</a> </td></tr>
+<tr id="row_112_260_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1AttrStmt.html" target="_self">tvm::tir::AttrStmt</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html" title="Define certain auxiliary attribute for the body to be a symbolic value. This provide auxiliary inform...">AttrStmtN [...]
+<tr id="row_112_260_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Block.html" target="_self">tvm::tir::Block</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockNode.html" title="A block is a basic schedule unit in TIR. ">BlockNode</a> </td></tr>
+<tr id="row_112_260_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BlockRealize.html" target="_self">tvm::tir::BlockRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BlockRealizeNode.html" title="A block realization node represents execution of the block at the binding values. ...">BlockRealizeNode [...]
+<tr id="row_112_260_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html" target="_self">tvm::tir::BufferRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html" title="Annotate the region where the buffer need to be read and write in the body. We only need to allocate [...]
+<tr id="row_112_260_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1BufferStore.html" target="_self">tvm::tir::BufferStore</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html" title="Store value to the high dimension buffer. ">BufferStoreNode</a> </td></tr>
+<tr id="row_112_260_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1DeclBuffer.html" target="_self">tvm::tir::DeclBuffer</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1DeclBufferNode.html" title="Declare a buffer that can be used in the body. ">DeclBufferNode</a> </td></tr>
+<tr id="row_112_260_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Evaluate.html" target="_self">tvm::tir::Evaluate</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html" title="Evaluates an expression. This is mostly used for putting a Call node into Stmt. ">EvaluateNode</a> </td></tr>
+<tr id="row_112_260_10_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1For.html" target="_self">tvm::tir::For</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ForNode.html" title="A for loop, with poissible type annotations. ">ForNode</a> </td></tr>
+<tr id="row_112_260_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1IfThenElse.html" target="_self">tvm::tir::IfThenElse</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html" title="IfThenElse statment. ">IfThenElseNode</a> </td></tr>
+<tr id="row_112_260_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1LetStmt.html" target="_self">tvm::tir::LetStmt</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html" title="Let binding, bind var to value, then run body. ">LetStmtNode</a> </td></tr>
+<tr id="row_112_260_13_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Prefetch.html" target="_self">tvm::tir::Prefetch</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html" title="A prefetch hint for a buffer. ">PrefetchNode</a> </td></tr>
+<tr id="row_112_260_14_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProducerRealize.html" target="_self">tvm::tir::ProducerRealize</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ProducerRealizeNode.html" title="Annotate the bounds where the data produced by the producer need to be written and read in bo [...]
+<tr id="row_112_260_15_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1ProducerStore.html" target="_self">tvm::tir::ProducerStore</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1ProducerStoreNode.html" title="Store value into mult-dimensional array that will be read by the consumer of the producer. ">Produc [...]
+<tr id="row_112_260_16_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1SeqStmt.html" target="_self">tvm::tir::SeqStmt</a></td><td class="desc">Sequence statement </td></tr>
+<tr id="row_112_260_17_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Store.html" target="_self">tvm::tir::Store</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1StoreNode.html" title="Store value to the buffer. ">StoreNode</a> </td></tr>
+<tr id="row_112_260_18_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1While.html" target="_self">tvm::tir::While</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1WhileNode.html" title="A While loop. ">WhileNode</a> </td></tr>
+<tr id="row_112_261_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1StmtSRef.html" target="_self">tvm::tir::StmtSRef</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1StmtSRefNode.html" title="An object that refers to schedulable elements (block/for-loop) in TensorIR, aka &quot;sref&quot;. ">StmtSRefNode</a>  [...]
+<tr id="row_112_262_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1TensorIntrin.html" target="_self">tvm::tir::TensorIntrin</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1TensorIntrinNode.html" title="Tensor intrinsics for tensorization. ">TensorIntrinNode</a> </td></tr>
+<tr id="row_112_263_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1Trace.html" target="_self">tvm::tir::Trace</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1tir_1_1TraceNode.html" title="An execution trace of a scheduling program. ">TraceNode</a> </td></tr>
+<tr id="row_112_264_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfo.html" target="_self">tvm::tir::usmp::AllocatedPoolInfo</a></td><td class="desc"></td></tr>
+<tr id="row_112_265_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfo.html" target="_self">tvm::tir::usmp::BufferInfo</a></td><td class="desc"></td></tr>
+<tr id="row_112_266_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysis.html" target="_self">tvm::tir::usmp::BufferInfoAnalysis</a></td><td class="desc"></td></tr>
+<tr id="row_112_267_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1tir_1_1usmp_1_1PoolAllocation.html" target="_self">tvm::tir::usmp::PoolAllocation</a></td><td class="desc"></td></tr>
+<tr id="row_112_268_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_268_" class="arrow" onclick="toggleFolder('112_268_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1Pass.html" target="_self">tvm::transform::Pass</a></td><td class="desc"></td></tr>
+<tr id="row_112_268_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1Sequential.html" target="_self">tvm::transform::Sequential</a></td><td class="desc"></td></tr>
+<tr id="row_112_269_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1PassContext.html" target="_self">tvm::transform::PassContext</a></td><td class="desc"><a class="el" href="classtvm_1_1transform_1_1PassContext.html" title="PassContext that is used to configure the pass behavior. ">PassContext</a> that is used to configure the pass behav [...]
+<tr id="row_112_270_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1transform_1_1PassInfo.html" target="_self">tvm::transform::PassInfo</a></td><td class="desc">Managed reference class for <a class="el" href="classtvm_1_1transform_1_1PassInfoNode.html" title="Meta data that will be used to help optimization and analysis. ">PassInfoNode</a> </td></tr>
+<tr id="row_112_271_" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_112_271_" class="arrow" onclick="toggleFolder('112_271_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Type.html" target="_self">tvm::Type</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeNode.html" title="Type is the base type of all types. ">TypeNode</a> </td></tr>
+<tr id="row_112_271_0_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1BaseTensorType.html" target="_self">tvm::BaseTensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1BaseTensorTypeNode.html" title="Base of all Tensor types This container can hold TensorType or GenericTensorType. ...">BaseTensorTypeNode</a> </td></tr>
+<tr id="row_112_271_1_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncType.html" target="_self">tvm::FuncType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1FuncTypeNode.html" title="Function type. ">FuncTypeNode</a> </td></tr>
+<tr id="row_112_271_2_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVar.html" target="_self">tvm::GlobalTypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalTypeVarNode.html" title="A global type variable that is used for defining new types or type aliases. ">GlobalTypeVarNode</a> </td></tr>
+<tr id="row_112_271_3_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteType.html" target="_self">tvm::IncompleteType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1IncompleteTypeNode.html" title="Intermediate values that is used to indicate incomplete type during type inference. ">IncompleteTypeNode</a> </td></tr>
+<tr id="row_112_271_4_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerType.html" target="_self">tvm::PointerType</a></td><td class="desc"></td></tr>
+<tr id="row_112_271_5_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimType.html" target="_self">tvm::PrimType</a></td><td class="desc"></td></tr>
+<tr id="row_112_271_6_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefType.html" target="_self">tvm::RelayRefType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayRefTypeNode.html" title="Reference Type High-level Relay IR. ">RelayRefTypeNode</a> </td></tr>
+<tr id="row_112_271_7_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorType.html" target="_self">tvm::TensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TensorTypeNode.html" title="This is the most commonly used type in relay. TensorType have a fixed dimension, data type...">TensorTypeNode</a> </td></tr>
+<tr id="row_112_271_8_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleType.html" target="_self">tvm::TupleType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TupleTypeNode.html" title="The type of tuple values. ">TupleTypeNode</a> </td></tr>
+<tr id="row_112_271_9_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCall.html" target="_self">tvm::TypeCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeCallNode.html" title="Type function application. ">TypeCallNode</a> </td></tr>
+<tr id="row_112_271_10_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_112_271_10_" class="arrow" onclick="toggleFolder('112_271_10_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraint.html" target="_self">tvm::TypeConstraint</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeConstraintNode.html" title="Potential Constraints  [...]
+<tr id="row_112_271_10_0_" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelation.html" target="_self">tvm::TypeRelation</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeRelationNode.html" title="User defined type relation, it is an input-output relation on types. ">TypeRelationNode</a> </td></tr>
+<tr id="row_112_271_11_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeData.html" target="_self">tvm::TypeData</a></td><td class="desc">Stores all data for an Algebraic Data <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> (ADT) </td></tr>
+<tr id="row_112_271_12_" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVar.html" target="_self">tvm::TypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeVarNode.html" title="Type parameter in functions. ">TypeVarNode</a> </td></tr>
+<tr id="row_112_272_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html" target="_self">tvm::TypedEnvFunc&lt; R(Args...)&gt;</a></td><td class="desc">A typed version of <a class="el" href="classtvm_1_1EnvFunc.html" title="Managed reference to EnvFuncNode. ">EnvFunc</a>. It is backed by a GlobalFuncNode internally [...]
+<tr id="row_112_273_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporter.html" target="_self">tvm::TypeReporter</a></td><td class="desc">Container class of <a class="el" href="classtvm_1_1TypeReporter.html" title="Container class of TypeReporter. ">TypeReporter</a> </td></tr>
+<tr id="row_112_274_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDevice.html" target="_self">tvm::VirtualDevice</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1VirtualDeviceNode.html" title="Describes at compile time the constraints on where data is to be stored at runtime down to the (virtu...">Vir [...]
+<tr id="row_112_275_" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspaceMemoryPools.html" target="_self">tvm::WorkspaceMemoryPools</a></td><td class="desc"></td></tr>
 <tr id="row_113_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker.html" target="_self">tvm::runtime::ObjectTypeChecker&lt; T &gt;</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> traits for runtime type check during FFI conversion </td></tr>
 <tr id="row_114_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Array_3_01T_01_4_01_4.html" target="_self">tvm::runtime::ObjectTypeChecker&lt; Array&lt; T &gt; &gt;</a></td><td class="desc"></td></tr>
 <tr id="row_115_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Map_3_01K_00_01V_01_4_01_4.html" target="_self">tvm::runtime::ObjectTypeChecker&lt; Map&lt; K, V &gt; &gt;</a></td><td class="desc"></td></tr>
@@ -1413,8 +1412,8 @@ This inheritance list is sorted roughly, but not completely, alphabetically:</di
 <tr id="row_127_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html" target="_self">tvm::relay::PatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" title="A dynamical functor on ADT patterns that dispatches on its first argument.  [...]
 <tr id="row_128_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1Profiler.html" target="_self">tvm::runtime::profiling::Profiler</a></td><td class="desc"></td></tr>
 <tr id="row_129_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable.html" target="_self">tvm::ReflectionVTable</a></td><td class="desc">Virtual function table to support IR/AST node reflection </td></tr>
-<tr id="row_130_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Registry.html" target="_self">tvm::runtime::Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Registry.html" title="Registry for global function. ">Registry</a> for global function </td></tr>
-<tr id="row_131_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" target="_self">tvm::ReflectionVTable::Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" title="Registry of a reflection table. ">Registry</a> of a reflection table </td></tr>
+<tr id="row_130_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" target="_self">tvm::ReflectionVTable::Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" title="Registry of a reflection table. ">Registry</a> of a reflection table </td></tr>
+<tr id="row_131_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1Registry.html" target="_self">tvm::runtime::Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1runtime_1_1Registry.html" title="Registry for global function. ">Registry</a> for global function </td></tr>
 <tr id="row_132_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprPrinter.html" target="_self">tvm::ReprPrinter</a></td><td class="desc">A printer class to print the AST/IR nodes </td></tr>
 <tr id="row_133_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1ReverseIterAdapter.html" target="_self">tvm::runtime::ReverseIterAdapter&lt; Converter, TIter &gt;</a></td><td class="desc">Iterator adapter that adapts TIter to return another type </td></tr>
 <tr id="row_134_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1arith_1_1RewriteSimplifier.html" target="_self">tvm::arith::RewriteSimplifier</a></td><td class="desc">Rewrite-rule based simplifier </td></tr>
diff --git a/docs/reference/api/doxygen/inherit_graph_101.svg b/docs/reference/api/doxygen/inherit_graph_101.svg
index c940c2fa85..af16fff0bb 100644
--- a/docs/reference/api/doxygen/inherit_graph_101.svg
+++ b/docs/reference/api/doxygen/inherit_graph_101.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 46)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-46 420,-46 420,4 -4,4"/>
-<!-- Node1429 -->
+<!-- Node1428 -->
 <g id="node1" class="node">
-<title>Node1429</title>
+<title>Node1428</title>
 <polygon fill="#ffffff" stroke="#bfbfbf" points="0,-11.5 0,-30.5 85,-30.5 85,-11.5 0,-11.5"/>
 <text text-anchor="middle" x="42.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">bool_constant</text>
 </g>
@@ -25,9 +25,9 @@
 </a>
 </g>
 </g>
-<!-- Node1429&#45;&gt;Node0 -->
+<!-- Node1428&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
-<title>Node1429&#45;&gt;Node0</title>
+<title>Node1428&#45;&gt;Node0</title>
 <path fill="none" stroke="#191970" d="M95.5463,-21C103.8034,-21 112.4186,-21 120.9137,-21"/>
 <polygon fill="#191970" stroke="#191970" points="95.3149,-17.5001 85.3149,-21 95.3148,-24.5001 95.3149,-17.5001"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_118.svg b/docs/reference/api/doxygen/inherit_graph_118.svg
index 8937db28f6..f06441aea2 100644
--- a/docs/reference/api/doxygen/inherit_graph_118.svg
+++ b/docs/reference/api/doxygen/inherit_graph_118.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 12638)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-12638 1069,-12638 1069,4 -4,4"/>
-<!-- Node1423 -->
+<!-- Node1422 -->
 <g id="node1" class="node">
-<title>Node1423</title>
+<title>Node1422</title>
 <g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html" target="_top" xlink:title="The container base structure contains all the fields except for the Object header. ">
 <polygon fill="#ffffff" stroke="#000000" points="20,-11126 20,-11156 148,-11156 148,-11126 20,-11126"/>
 <text text-anchor="start" x="28" y="-11144" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::NDArray</text>
@@ -29,15 +29,15 @@
 </a>
 </g>
 </g>
-<!-- Node1423&#45;&gt;Node530 -->
+<!-- Node1422&#45;&gt;Node530 -->
 <g id="edge1" class="edge">
-<title>Node1423&#45;&gt;Node530</title>
+<title>Node1422&#45;&gt;Node530</title>
 <path fill="none" stroke="#191970" d="M158.2796,-11140.3278C184.9735,-11140.0862 214.8383,-11139.8159 240.6206,-11139.5826"/>
 <polygon fill="#191970" stroke="#191970" points="158.1871,-11136.8284 148.2192,-11140.4188 158.2505,-11143.8281 158.1871,-11136.8284"/>
 </g>
-<!-- Node1365 -->
+<!-- Node1364 -->
 <g id="node3" class="node">
-<title>Node1365</title>
+<title>Node1364</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1runtime_1_1InplaceArrayBase.html" target="_top" xlink:title="Base template for classes with array like memory layout. ">
 <polygon fill="#ffffff" stroke="#000000" points="222.5,-2715 222.5,-2745 387.5,-2745 387.5,-2715 222.5,-2715"/>
 <text text-anchor="start" x="230.5" y="-2733" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::InplaceArray</text>
@@ -54,15 +54,15 @@
 </a>
 </g>
 </g>
-<!-- Node1365&#45;&gt;Node521 -->
+<!-- Node1364&#45;&gt;Node521 -->
 <g id="edge2" class="edge">
-<title>Node1365&#45;&gt;Node521</title>
+<title>Node1364&#45;&gt;Node521</title>
 <path fill="none" stroke="#191970" d="M396.6595,-2713.0982C400.0257,-2710.7165 403.165,-2708.0294 406,-2705 468.3107,-2638.416 378.0382,-2567.9995 442,-2503 446.9593,-2497.9602 452.831,-2494.0474 459.1955,-2491.0298"/>
 <polygon fill="#191970" stroke="#191970" points="394.6169,-2710.2431 387.8687,-2718.4108 398.2375,-2716.234 394.6169,-2710.2431"/>
 </g>
-<!-- Node1364 -->
+<!-- Node1363 -->
 <g id="node5" class="node">
-<title>Node1364</title>
+<title>Node1363</title>
 <g id="a_node5"><a xlink:href="classtvm_1_1runtime_1_1InplaceArrayBase.html" target="_top" xlink:title="tvm::runtime::InplaceArray\lBase\&lt; ADTObj, ObjectRef \&gt;">
 <polygon fill="#ffffff" stroke="#000000" points="7.5,-11077 7.5,-11107 160.5,-11107 160.5,-11077 7.5,-11077"/>
 <text text-anchor="start" x="15.5" y="-11095" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::InplaceArray</text>
@@ -79,15 +79,15 @@
 </a>
 </g>
 </g>
-<!-- Node1364&#45;&gt;Node514 -->
+<!-- Node1363&#45;&gt;Node514 -->
 <g id="edge3" class="edge">
-<title>Node1364&#45;&gt;Node514</title>
+<title>Node1363&#45;&gt;Node514</title>
 <path fill="none" stroke="#191970" d="M170.9456,-11092C195.1707,-11092 220.9797,-11092 243.4174,-11092"/>
 <polygon fill="#191970" stroke="#191970" points="170.6749,-11088.5001 160.6748,-11092 170.6748,-11095.5001 170.6749,-11088.5001"/>
 </g>
-<!-- Node1363 -->
+<!-- Node1362 -->
 <g id="node7" class="node">
-<title>Node1363</title>
+<title>Node1362</title>
 <g id="a_node7"><a xlink:href="classtvm_1_1runtime_1_1InplaceArrayBase.html" target="_top" xlink:title="tvm::runtime::InplaceArray\lBase\&lt; ArrayNode, ObjectRef \&gt;">
 <polygon fill="#ffffff" stroke="#000000" points="0,-11028 0,-11058 168,-11058 168,-11028 0,-11028"/>
 <text text-anchor="start" x="8" y="-11046" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::InplaceArray</text>
@@ -104,9 +104,9 @@
 </a>
 </g>
 </g>
-<!-- Node1363&#45;&gt;Node515 -->
+<!-- Node1362&#45;&gt;Node515 -->
 <g id="edge4" class="edge">
-<title>Node1363&#45;&gt;Node515</title>
+<title>Node1362&#45;&gt;Node515</title>
 <path fill="none" stroke="#191970" d="M178.2093,-11047.6892C197.5291,-11048.6508 217.5282,-11049.6462 235.774,-11050.5544"/>
 <polygon fill="#191970" stroke="#191970" points="178.3427,-11044.1916 168.1811,-11047.19 177.9947,-11051.1829 178.3427,-11044.1916"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_12.svg b/docs/reference/api/doxygen/inherit_graph_12.svg
index 66a00e898e..71c30a7a4a 100644
--- a/docs/reference/api/doxygen/inherit_graph_12.svg
+++ b/docs/reference/api/doxygen/inherit_graph_12.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 62)">
 <title>Graphical Class Hierarchy</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-62 186,-62 186,4 -4,4"/>
-<!-- Node1411 -->
+<!-- Node1410 -->
 <g id="node1" class="node">
-<title>Node1411</title>
+<title>Node1410</title>
 <polygon fill="#ffffff" stroke="#bfbfbf" points="0,-19.5 0,-38.5 40,-38.5 40,-19.5 0,-19.5"/>
 <text text-anchor="middle" x="20" y="-26.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">Error</text>
 </g>
@@ -24,24 +24,24 @@
 </a>
 </g>
 </g>
-<!-- Node1411&#45;&gt;Node0 -->
+<!-- Node1410&#45;&gt;Node0 -->
 <g id="edge1" class="edge">
-<title>Node1411&#45;&gt;Node0</title>
+<title>Node1410&#45;&gt;Node0</title>
 <path fill="none" stroke="#191970" d="M50.1726,-34.2594C61.6171,-36.2544 74.8623,-38.5631 87.1902,-40.712"/>
 <polygon fill="#191970" stroke="#191970" points="50.6991,-30.7985 40.2466,-32.5292 49.497,-37.6945 50.6991,-30.7985"/>
 </g>
-<!-- Node1413 -->
+<!-- Node1412 -->
 <g id="node3" class="node">
-<title>Node1413</title>
+<title>Node1412</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1CompileError.html" target="_top" xlink:title="Custom Error class to be thrown during compilation. ">
 <polygon fill="#ffffff" stroke="#000000" points="76,-.5 76,-19.5 182,-19.5 182,-.5 76,-.5"/>
 <text text-anchor="middle" x="129" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::CompileError</text>
 </a>
 </g>
 </g>
-<!-- Node1411&#45;&gt;Node1413 -->
+<!-- Node1410&#45;&gt;Node1412 -->
 <g id="edge2" class="edge">
-<title>Node1411&#45;&gt;Node1413</title>
+<title>Node1410&#45;&gt;Node1412</title>
 <path fill="none" stroke="#191970" d="M50.1333,-23.7474C58.0955,-22.3595 66.9315,-20.8193 75.7249,-19.2865"/>
 <polygon fill="#191970" stroke="#191970" points="49.497,-20.3055 40.2466,-25.4708 50.6991,-27.2015 49.497,-20.3055"/>
 </g>
diff --git a/docs/reference/api/doxygen/inherit_graph_127.svg b/docs/reference/api/doxygen/inherit_graph_127.svg
index d9dae79eb3..043e07d1d7 100644
--- a/docs/reference/api/doxygen/inherit_graph_127.svg
+++ b/docs/reference/api/doxygen/inherit_graph_127.svg
@@ -4,17 +4,17 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: Graphical Class Hierarchy Pages: 1 -->
-<svg width="986pt" height="13395pt"
- viewBox="0.00 0.00 986.00 13394.50" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 13390.5)">
+<svg width="986pt" height="13335pt"
+ viewBox="0.00 0.00 986.00 13334.50" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 13330.5)">
 <title>Graphical Class Hierarchy</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-13390.5 982,-13390.5 982,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-13330.5 982,-13330.5 982,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
 <g id="a_node1"><a xlink:href="classtvm_1_1runtime_1_1ObjectRef.html" target="_top" xlink:title="Base class of all object reference. ">
-<polygon fill="#ffffff" stroke="#000000" points="0,-6013.5 0,-6032.5 134,-6032.5 134,-6013.5 0,-6013.5"/>
-<text text-anchor="middle" x="67" y="-6020.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
+<polygon fill="#ffffff" stroke="#000000" points="0,-5988.5 0,-6007.5 134,-6007.5 134,-5988.5 0,-5988.5"/>
+<text text-anchor="middle" x="67" y="-5995.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
 </a>
 </g>
 </g>
@@ -22,7632 +22,7615 @@
 <g id="node2" class="node">
 <title>Node1</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; Range \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="237,-13356 237,-13386 350,-13386 350,-13356 237,-13356"/>
-<text text-anchor="start" x="245" y="-13374" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="middle" x="293.5" y="-13363" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Range &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="237,-13296 237,-13326 350,-13326 350,-13296 237,-13296"/>
+<text text-anchor="start" x="245" y="-13314" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="middle" x="293.5" y="-13303" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Range &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M67.1055,-6042.893C69.3324,-6459.2126 106.7327,-13271.3444 170,-13347 186.0435,-13366.185 212.4414,-13373.0095 236.7218,-13374.6925"/>
-<polygon fill="#191970" stroke="#191970" points="70.6055,-6042.872 67.0522,-6032.8908 63.6056,-6042.9093 70.6055,-6042.872"/>
+<path fill="none" stroke="#191970" d="M67.108,-6018.1054C69.3693,-6435.469 107.048,-13211.7266 170,-13287 186.0442,-13306.1845 212.4421,-13313.0089 236.7224,-13314.692"/>
+<polygon fill="#191970" stroke="#191970" points="70.6066,-6017.8245 67.0526,-6007.8435 63.6067,-6017.8623 70.6066,-6017.8245"/>
 </g>
 <!-- Node2 -->
 <g id="node3" class="node">
 <title>Node2</title>
 <g id="a_node3"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; Region \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="237,-13307 237,-13337 350,-13337 350,-13307 237,-13307"/>
-<text text-anchor="start" x="245" y="-13325" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="middle" x="293.5" y="-13314" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Region &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="237,-13247 237,-13277 350,-13277 350,-13247 237,-13247"/>
+<text text-anchor="start" x="245" y="-13265" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="middle" x="293.5" y="-13254" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; Region &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node2 -->
 <g id="edge2" class="edge">
 <title>Node0&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M67.1092,-6043.0634C69.3953,-6459.5538 107.4695,-13221.6382 170,-13297 186.0642,-13316.3605 212.6177,-13323.3718 236.9948,-13325.2219"/>
-<polygon fill="#191970" stroke="#191970" points="70.6079,-6042.8036 67.0532,-6032.8229 63.608,-6042.842 70.6079,-6042.8036"/>
+<path fill="none" stroke="#191970" d="M67.11,-6017.9668C69.4137,-6432.4534 107.7677,-13162.0029 170,-13237 186.0648,-13256.36 212.6184,-13263.3712 236.9954,-13265.2214"/>
+<polygon fill="#191970" stroke="#191970" points="70.609,-6017.7561 67.0536,-6007.7757 63.6091,-6017.7949 70.609,-6017.7561"/>
 </g>
 <!-- Node3 -->
 <g id="node4" class="node">
 <title>Node3</title>
 <g id="a_node4"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\&lt; T \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="223,-13268.5 223,-13287.5 364,-13287.5 364,-13268.5 223,-13268.5"/>
-<text text-anchor="middle" x="293.5" y="-13275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array&lt; T &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="223,-13208.5 223,-13227.5 364,-13227.5 364,-13208.5 223,-13208.5"/>
+<text text-anchor="middle" x="293.5" y="-13215.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array&lt; T &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node3 -->
 <g id="edge3" class="edge">
 <title>Node0&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M67.1051,-6042.9628C69.3076,-6457.3648 106.046,-13185.5192 170,-13259 183.1738,-13274.1362 202.9514,-13280.8147 222.8405,-13283.1714"/>
-<polygon fill="#191970" stroke="#191970" points="70.6042,-6042.755 67.0512,-6032.7737 63.6043,-6042.7921 70.6042,-6042.755"/>
+<path fill="none" stroke="#191970" d="M67.106,-6017.8662C69.3265,-6430.264 106.3527,-13125.8767 170,-13199 183.1743,-13214.1358 202.9521,-13220.8142 222.8412,-13223.1709"/>
+<polygon fill="#191970" stroke="#191970" points="70.6053,-6017.7075 67.0516,-6007.7264 63.6054,-6017.7451 70.6053,-6017.7075"/>
 </g>
 <!-- Node4 -->
 <g id="node5" class="node">
 <title>Node4</title>
 <g id="a_node5"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::arith::IterSplitExpr \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="217,-13219 217,-13249 370,-13249 370,-13219 217,-13219"/>
-<text text-anchor="start" x="225" y="-13237" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="middle" x="293.5" y="-13226" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::arith::IterSplitExpr &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="217,-13159 217,-13189 370,-13189 370,-13159 217,-13159"/>
+<text text-anchor="start" x="225" y="-13177" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="middle" x="293.5" y="-13166" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::arith::IterSplitExpr &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node4 -->
 <g id="edge4" class="edge">
 <title>Node0&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M67.1104,-6042.8241C69.4231,-6454.3476 107.9213,-13135.7859 170,-13210 181.7017,-13223.9892 198.9089,-13231.406 216.785,-13235.0495"/>
-<polygon fill="#191970" stroke="#191970" points="70.6098,-6042.686 67.0538,-6032.7058 63.6099,-6042.7253 70.6098,-6042.686"/>
+<path fill="none" stroke="#191970" d="M67.1113,-6017.7276C69.4416,-6427.2471 108.221,-13076.1494 170,-13150 181.7022,-13163.9888 198.9095,-13171.4054 216.7857,-13175.0489"/>
+<polygon fill="#191970" stroke="#191970" points="70.611,-6017.6385 67.0542,-6007.6585 63.6111,-6017.6782 70.611,-6017.6385"/>
 </g>
 <!-- Node5 -->
 <g id="node6" class="node">
 <title>Node5</title>
 <g id="a_node6"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::arith::IterSumExpr \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="217,-13170 217,-13200 370,-13200 370,-13170 217,-13170"/>
-<text text-anchor="start" x="225" y="-13188" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="middle" x="293.5" y="-13177" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::arith::IterSumExpr &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="217,-13110 217,-13140 370,-13140 370,-13110 217,-13110"/>
+<text text-anchor="start" x="225" y="-13128" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="middle" x="293.5" y="-13117" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::arith::IterSumExpr &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node0&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M67.1116,-6042.6889C69.449,-6451.4069 108.3408,-13087.2949 170,-13161 181.7024,-13174.9887 198.9098,-13182.4052 216.786,-13186.0487"/>
-<polygon fill="#191970" stroke="#191970" points="70.6114,-6042.6195 67.0544,-6032.6396 63.6115,-6042.6594 70.6114,-6042.6195"/>
+<path fill="none" stroke="#191970" d="M67.1125,-6017.5924C69.4674,-6424.3064 108.6405,-13027.6584 170,-13101 181.7029,-13114.9882 198.9105,-13122.4047 216.7866,-13126.0482"/>
+<polygon fill="#191970" stroke="#191970" points="70.6125,-6017.572 67.0548,-6007.5924 63.6127,-6017.6124 70.6125,-6017.572"/>
 </g>
 <!-- Node6 -->
 <g id="node7" class="node">
 <title>Node6</title>
 <g id="a_node7"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::AttrFieldInfo \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="231.5,-13121 231.5,-13151 355.5,-13151 355.5,-13121 231.5,-13121"/>
-<text text-anchor="start" x="239.5" y="-13139" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="middle" x="293.5" y="-13128" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::AttrFieldInfo &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="231.5,-13061 231.5,-13091 355.5,-13091 355.5,-13061 231.5,-13061"/>
+<text text-anchor="start" x="239.5" y="-13079" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="middle" x="293.5" y="-13068" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::AttrFieldInfo &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node6 -->
 <g id="edge6" class="edge">
 <title>Node0&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M67.1154,-6042.8516C69.5124,-6451.6395 109.0695,-13037.5943 170,-13111 184.8103,-13128.8426 208.5296,-13136.1955 231.241,-13138.6864"/>
-<polygon fill="#191970" stroke="#191970" points="70.6138,-6042.5511 67.0554,-6032.5718 63.614,-6042.5921 70.6138,-6042.5511"/>
+<path fill="none" stroke="#191970" d="M67.1163,-6017.7536C69.531,-6424.523 109.3676,-12977.9589 170,-13051 184.8109,-13068.8421 208.5303,-13076.1949 231.2416,-13078.6858"/>
+<polygon fill="#191970" stroke="#191970" points="70.615,-6017.5036 67.0558,-6007.5245 63.6151,-6017.5451 70.615,-6017.5036"/>
 </g>
 <!-- Node7 -->
 <g id="node8" class="node">
 <title>Node7</title>
 <g id="a_node8"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::auto_scheduler\l::Iterator \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="229.5,-13060.5 229.5,-13101.5 357.5,-13101.5 357.5,-13060.5 229.5,-13060.5"/>
-<text text-anchor="start" x="237.5" y="-13089.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="start" x="237.5" y="-13078.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::auto_scheduler</text>
-<text text-anchor="middle" x="293.5" y="-13067.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Iterator &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="229.5,-13000.5 229.5,-13041.5 357.5,-13041.5 357.5,-13000.5 229.5,-13000.5"/>
+<text text-anchor="start" x="237.5" y="-13029.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="start" x="237.5" y="-13018.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::auto_scheduler</text>
+<text text-anchor="middle" x="293.5" y="-13007.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Iterator &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node7 -->
 <g id="edge7" class="edge">
 <title>Node0&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M67.1229,-6042.9885C69.6529,-6451.2376 111.0102,-12977.0864 170,-13051 184.2452,-13068.8491 207.1587,-13077.1692 229.4028,-13080.7479"/>
-<polygon fill="#191970" stroke="#191970" points="70.6225,-6042.9003 67.0607,-6032.9221 63.6227,-6042.9436 70.6225,-6042.9003"/>
+<path fill="none" stroke="#191970" d="M67.1238,-6017.8889C69.6711,-6424.105 111.3012,-12917.4566 170,-12991 184.2459,-13008.8486 207.1594,-13017.1686 229.4035,-13020.7473"/>
+<polygon fill="#191970" stroke="#191970" points="70.6237,-6017.8506 67.0612,-6007.8727 63.6238,-6017.8944 70.6237,-6017.8506"/>
 </g>
 <!-- Node8 -->
 <g id="node9" class="node">
 <title>Node8</title>
 <g id="a_node9"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::auto_scheduler\l::Stage \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="229.5,-13000.5 229.5,-13041.5 357.5,-13041.5 357.5,-13000.5 229.5,-13000.5"/>
-<text text-anchor="start" x="237.5" y="-13029.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="start" x="237.5" y="-13018.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::auto_scheduler</text>
-<text text-anchor="middle" x="293.5" y="-13007.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Stage &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="229.5,-12940.5 229.5,-12981.5 357.5,-12981.5 357.5,-12940.5 229.5,-12940.5"/>
+<text text-anchor="start" x="237.5" y="-12969.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="start" x="237.5" y="-12958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::auto_scheduler</text>
+<text text-anchor="middle" x="293.5" y="-12947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Stage &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node8 -->
 <g id="edge8" class="edge">
 <title>Node0&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M67.1263,-6043.1182C69.7047,-6450.771 111.5235,-12917.739 170,-12991 184.2463,-13008.8482 207.16,-13017.1682 229.404,-13020.7469"/>
-<polygon fill="#191970" stroke="#191970" points="70.6245,-6042.8151 67.0615,-6032.8374 63.6247,-6042.8593 70.6245,-6042.8151"/>
+<path fill="none" stroke="#191970" d="M67.1271,-6018.0172C69.7231,-6423.6225 111.8145,-12858.1091 170,-12931 184.247,-12948.8477 207.1608,-12957.1675 229.4047,-12960.7464"/>
+<polygon fill="#191970" stroke="#191970" points="70.6257,-6017.7654 67.0619,-6007.788 63.6259,-6017.8102 70.6257,-6017.7654"/>
 </g>
 <!-- Node9 -->
 <g id="node10" class="node">
 <title>Node9</title>
 <g id="a_node10"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::auto_scheduler\l::Step \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="229.5,-12940.5 229.5,-12981.5 357.5,-12981.5 357.5,-12940.5 229.5,-12940.5"/>
-<text text-anchor="start" x="237.5" y="-12969.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="start" x="237.5" y="-12958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::auto_scheduler</text>
-<text text-anchor="middle" x="293.5" y="-12947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Step &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="229.5,-12880.5 229.5,-12921.5 357.5,-12921.5 357.5,-12880.5 229.5,-12880.5"/>
+<text text-anchor="start" x="237.5" y="-12909.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
+<text text-anchor="start" x="237.5" y="-12898.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::auto_scheduler</text>
+<text text-anchor="middle" x="293.5" y="-12887.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::Step &gt;</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node9 -->
 <g id="edge9" class="edge">
 <title>Node0&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M67.1278,-6042.945C69.7362,-6447.0878 112.0223,-12858.3735 170,-12931 184.2475,-12948.8473 207.1614,-12957.1671 229.4052,-12960.746"/>
-<polygon fill="#191970" stroke="#191970" points="70.6266,-6042.7299 67.0622,-6032.7527 63.6267,-6042.775 70.6266,-6042.7299"/>
+<path fill="none" stroke="#191970" d="M67.1286,-6017.844C69.7545,-6419.9393 112.3132,-12798.7435 170,-12871 184.2482,-12888.8468 207.1622,-12897.1664 229.4059,-12900.7454"/>
+<polygon fill="#191970" stroke="#191970" points="70.6277,-6017.6803 67.0626,-6007.7033 63.6279,-6017.7259 70.6277,-6017.6803"/>
 </g>
 <!-- Node10 -->
 <g id="node11" class="node">
 <title>Node10</title>
 <g id="a_node11"><a xlink:href="classtvm_1_1runtime_1_1Array.html" target="_top" xlink:title="tvm::runtime::Array\l\&lt; tvm::BaseFunc \&gt;">
-<polygon fill="#ffffff" stroke="#000000" points="237,-12891 237,-12921 350,-12921 350,-12891 237,-12891"/>
-<text text-anchor="start" x="245" y="-12909" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Array</text>
-<text text-anchor="middle" x="293.5" y="-12898" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">&lt; tvm::BaseFunc &gt;</text>
+<polygon fill="#ffffff" stroke="#000000" points="237,-12831 237,-12861 350,-12861 350,-12831 237,-12831"/>
... 14050 lines suppressed ...