You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/07/18 19:16:40 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@9c7aaace4355c67403be563de3059d34fb8e29f5)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new bffeae632 deploying docs (apache/tvm@9c7aaace4355c67403be563de3059d34fb8e29f5)
bffeae632 is described below

commit bffeae6323e9ecfb2a5635916dc701c9fdf05f6f
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Mon Jul 18 19:16:33 2022 +0000

    deploying docs (apache/tvm@9c7aaace4355c67403be563de3059d34fb8e29f5)
---
 .../how_to/compile_models/from_darknet.rst.txt     |    2 +-
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   16 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    4 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |   10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 | 1057 +++-----------------
 .../tune_network_cuda.rst.txt                      |    2 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |  132 ++-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |    4 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   34 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |    8 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    6 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |    2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   14 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |    4 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |   20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   54 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   20 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   49 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_darknet.html       |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       |   13 +-
 docs/how_to/compile_models/from_pytorch.html       |    7 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   18 +-
 docs/how_to/deploy_models/deploy_prequantized.html |    6 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   36 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   16 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    4 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |   10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    | 1057 +++-----------------
 .../tune_with_autoscheduler/tune_network_cuda.html |    2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |  132 ++-
 .../tune_with_autotvm/sg_execution_times.html      |    4 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   34 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |    8 +-
 .../how_to/work_with_relay/sg_execution_times.html |    6 +-
 docs/how_to/work_with_schedules/intrin_math.html   |    2 +-
 .../work_with_schedules/sg_execution_times.html    |   14 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 .../api/doxygen/affine__type_8h_source.html        |    2 +-
 docs/reference/api/doxygen/analyzer_8h_source.html |    2 +-
 docs/reference/api/doxygen/buffer_8h_source.html   |    2 +-
 .../api/doxygen/constant__utils_8h_source.html     |    4 +-
 .../api/doxygen/cuda_2dense_8h_source.html         |    2 +-
 .../api/doxygen/cuda_2injective_8h_source.html     |    6 +-
 .../api/doxygen/cuda_2pooling_8h_source.html       |    6 +-
 .../api/doxygen/cuda_2reduction_8h_source.html     |    6 +-
 .../api/doxygen/cuda_2softmax_8h_source.html       |    2 +-
 .../api/doxygen/dataflow__matcher_8h_source.html   |    2 +-
 .../api/doxygen/dataflow__pattern_8h_source.html   |    6 +-
 .../api/doxygen/detail_2broadcast_8h_source.html   |    2 +-
 .../api/doxygen/detail_2extern_8h_source.html      |    2 +-
 docs/reference/api/doxygen/dilate_8h_source.html   |    2 +-
 docs/reference/api/doxygen/doc_8h_source.html      |    4 +-
 docs/reference/api/doxygen/elemwise_8h_source.html |    2 +-
 docs/reference/api/doxygen/error_8h_source.html    |    2 +-
 docs/reference/api/doxygen/executor_8h_source.html |    2 +-
 docs/reference/api/doxygen/int__set_8h_source.html |    2 +-
 .../api/doxygen/int__solver_8h_source.html         |    2 +-
 .../api/doxygen/interpreter_8h_source.html         |    2 +-
 docs/reference/api/doxygen/ir_2adt_8h_source.html  |    6 +-
 .../reference/api/doxygen/ir_2attrs_8h_source.html |    6 +-
 docs/reference/api/doxygen/ir_2expr_8h.html        |   60 ++
 docs/reference/api/doxygen/ir_2expr_8h_source.html |  124 ++-
 .../api/doxygen/ir_2function_8h_source.html        |    4 +-
 .../api/doxygen/ir_2module_8h_source.html          |    6 +-
 docs/reference/api/doxygen/ir_2op_8h_source.html   |    4 +-
 .../doxygen/local__response__norm_8h_source.html   |    4 +-
 .../api/doxygen/memory__pools_8h_source.html       |    4 +-
 .../api/doxygen/namespacemembers_func_o.html       |   44 +-
 .../api/doxygen/namespacemembers_func_p.html       |    6 +-
 docs/reference/api/doxygen/namespacemembers_o.html |   34 +-
 docs/reference/api/doxygen/namespacemembers_p.html |    6 +-
 docs/reference/api/doxygen/namespacetvm.html       |  596 +++++------
 docs/reference/api/doxygen/nn_2bnn_8h_source.html  |    6 +-
 .../reference/api/doxygen/nn_2dense_8h_source.html |    2 +-
 .../api/doxygen/nn_2pooling_8h_source.html         |    6 +-
 .../api/doxygen/nn_2softmax_8h_source.html         |    6 +-
 .../api/doxygen/op__strategy_8h_source.html        |    2 +-
 .../reference/api/doxygen/operation_8h_source.html |    2 +-
 .../reference/api/doxygen/reduction_8h_source.html |    6 +-
 .../api/doxygen/relay_2adt_8h_source.html          |    4 +-
 .../doxygen/relay_2attrs_2transform_8h_source.html |    6 +-
 .../api/doxygen/relay_2expr_8h_source.html         |   14 +-
 .../doxygen/relay_2expr__functor_8h_source.html    |    4 +-
 .../api/doxygen/relay_2feature_8h_source.html      |    4 +-
 .../api/doxygen/relay_2function_8h_source.html     |    6 +-
 .../doxygen/relay_2op__attr__types_8h_source.html  |    2 +-
 .../api/doxygen/relay_2transform_8h_source.html    |    2 +-
 docs/reference/api/doxygen/search/all_10.js        |   28 +-
 docs/reference/api/doxygen/search/all_11.js        |    2 +-
 docs/reference/api/doxygen/search/all_14.js        |    2 +-
 docs/reference/api/doxygen/search/functions_10.js  |    2 +-
 docs/reference/api/doxygen/search/functions_f.js   |   28 +-
 .../api/doxygen/strided__slice_8h_source.html      |    4 +-
 .../api/doxygen/tir_2analysis_8h_source.html       |    2 +-
 .../reference/api/doxygen/tir_2expr_8h_source.html |    6 +-
 .../api/doxygen/tir_2expr__functor_8h_source.html  |    4 +-
 .../api/doxygen/tir_2function_8h_source.html       |    2 +-
 docs/reference/api/doxygen/tir_2op_8h.html         |   68 +-
 docs/reference/api/doxygen/tir_2op_8h_source.html  |  142 ++-
 .../doxygen/tir_2op__attr__types_8h_source.html    |    2 +-
 .../doxygen/tir_2usmp_2transform_8h_source.html    |    2 +-
 .../api/doxygen/tir_2usmp_2utils_8h_source.html    |    2 +-
 docs/reference/api/doxygen/topi_2nn_8h_source.html |   10 +-
 .../api/doxygen/topi_2transform_8h_source.html     |   14 +-
 .../api/doxygen/topi_2utils_8h_source.html         |    2 +-
 .../api/doxygen/transform__step_8h_source.html     |    2 +-
 docs/reference/api/doxygen/var_8h_source.html      |    2 +-
 .../api/doxygen/virtual__device_8h_source.html     |    2 +-
 docs/reference/api/doxygen/vision_8h_source.html   |    2 +-
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +--
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    4 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    4 +-
 docs/tutorial/autotvm_matmul_x86.html              |   20 +-
 docs/tutorial/autotvm_relay_x86.html               |  258 ++---
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   24 +-
 docs/tutorial/tensor_expr_get_started.html         |   45 +-
 193 files changed, 1928 insertions(+), 3402 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_darknet.rst.txt b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
index 18221fd5e..f04cd9c1f 100644
--- a/docs/_sources/how_to/compile_models/from_darknet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
@@ -317,7 +317,7 @@ The process is no different from other examples.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  0.890 seconds)
+   **Total running time of the script:** ( 1 minutes  4.791 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_darknet.py:
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index c6438aa2b..2d9c29274 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -115,7 +115,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip3f57ba66-5bd1-48cd-a714-ceb3b1a31e15 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipf0c21fcb-c5be-400a-ada3-38098fbccd38 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index 87508d8e5..1890cf8ad 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -113,7 +113,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     19%|#9        | 7.99M/41.5M [00:00<00:00, 70.5MB/s]
     39%|###8      | 16.0M/41.5M [00:00<00:00, 61.3MB/s]
     60%|######    | 25.0M/41.5M [00:00<00:00, 73.8MB/s]
     78%|#######7  | 32.3M/41.5M [00:00<00:00, 72.5MB/s]
     96%|#########6| 40.0M/41.5M [00:00<00:00, 73.9MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 73.9MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     19%|#9        | 7.99M/41.5M [00:00<00:00, 76.3MB/s]
     37%|###6      | 15.3M/41.5M [00:00<00:00, 73.1MB/s]
     54%|#####3    | 22.2M/41.5M [00:00<00:00, 73.0MB/s]
     70%|#######   | 29.2M/41.5M [00:00<00:00, 49.1MB/s]
     83%|########3 | 34.6M/41.5M [00:00<00:00, 44.1MB/s]
     96%|#########6| 40.0M/41.5M [00:00<00:00, 42.8MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 50.9MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 407335fc1..300fb3753 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -94,7 +94,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     42%|####1     | 18.6M/44.7M [00:00<00:00, 195MB/s]
     85%|########4 | 37.9M/44.7M [00:00<00:00, 200MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 203MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
      9%|9         | 4.02M/44.7M [00:00<00:01, 42.1MB/s]
     18%|#8        | 8.05M/44.7M [00:00<00:00, 40.9MB/s]
     67%|######6   | 29.9M/44.7M [00:00<00:00, 125MB/s] 
    100%|##########| 44.7M/44.7M [00:00<00:00, 116MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 41252fed6..916c3e46c 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -423,7 +423,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  5.479 seconds)
+   **Total running time of the script:** ( 1 minutes  7.617 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 353228f01..9369af951 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:01.573** total execution time for **how_to_compile_models** files:
+**05:14.531** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:05.479 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:07.617 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:00.890 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:04.791 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:39.009 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:40.806 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:26.922 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:29.005 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:24.571 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:26.079 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.560 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.383 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:23.300 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:23.995 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:19.894 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:20.074 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:14.556 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:15.271 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.392 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.510 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 19af9ed34..ceb766276 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -441,7 +441,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.3400      16.3749      16.9010      15.8284       0.4512   
+      16.6725      16.6659      17.2928      16.0786       0.4463   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 1ea978518..34c061933 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -123,7 +123,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
     12%|#2        | 21.2M/170M [00:00<00:00, 222MB/s]
     28%|##8       | 47.9M/170M [00:00<00:00, 256MB/s]
     44%|####4     | 75.2M/170M [00:00<00:00, 270MB/s]
     60%|#####9    | 101M/170M [00:00<00:00, 271MB/s] 
     75%|#######5  | 128M/170M [00:00<00:00, 273MB/s]
     91%|######### | 154M/170M [00:00<00:00, 275MB/s]
    100%|##########| 170M/170M [00:00<00:00, 268MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      7%|6         | 11.7M/170M [00:00<00:01, 115MB/s]
     16%|#6        | 27.3M/170M [00:00<00:01, 142MB/s]
     27%|##7       | 46.5M/170M [00:00<00:00, 169MB/s]
     40%|###9      | 67.2M/170M [00:00<00:00, 188MB/s]
     53%|#####3    | 90.7M/170M [00:00<00:00, 209MB/s]
     66%|######6   | 112M/170M [00:00<00:00, 214MB/s] 
     80%|########  | 136M/170M [00:00<00:00, 226MB/s]
     94%|#########4| 160M/170M [00:00<00:00, 234MB/s]
    100%|##########| 170M/170M [00:00<00:00, 208MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -292,7 +292,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  56.292 seconds)
+   **Total running time of the script:** ( 3 minutes  3.849 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index 232e6a21e..303a72f7c 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -232,7 +232,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 165MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 152MB/s]
 
 
 
@@ -412,7 +412,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.3487      90.2415      96.5346      90.1097       0.6424   
+      90.3458      90.2961      90.8836      90.1769       0.1406   
                
 
 
@@ -461,7 +461,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  8.511 seconds)
+   **Total running time of the script:** ( 1 minutes  9.946 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index d906318a3..c1eb9de5d 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -439,7 +439,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      119.6078     119.5939     120.5944     118.7345      0.3383   
+      120.4244     120.3765     124.4274     119.7161      0.5119   
                
 
 
@@ -476,7 +476,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  51.782 seconds)
+   **Total running time of the script:** ( 1 minutes  52.525 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 40a82e668..596c29303 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -255,7 +255,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  18.719 seconds)
+   **Total running time of the script:** ( 1 minutes  26.287 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 6cf758ef1..ad9b97d3d 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -158,7 +158,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      5%|4         | 6141/132723 [00:00<00:02, 61398.49KB/s]
     11%|#         | 13975/132723 [00:00<00:01, 71360.35KB/s]
     17%|#6        | 21946/132723 [00:00<00:01, 75163.93KB/s]
     23%|##2       | 29974/132723 [00:00<00:01, 77179.65KB/s]
     29%|##8       | 37933/132723 [00:00<00:01, 78037.58KB/s]
     35%|###4      | 45998/132723 [00:00<00:01, 78924.47KB/s]
     41%|####      | 53962/132723 [00:00<00:00, 79156.23KB/s]
     47%|####6     | 61982/132723 [00:00<00:00, 79485.31KB/s]
     53%|#####2    | 69931/132723 [00:00<00:00, 79383.92KB/s]
     59%|#####8    | 77945/132723 [00:01<00:00, 79612.51KB/s]
     65%|######4   | 85939/132723 [00:01<00:00, 79711.47KB/s]
     71%|#######   | 93931/132723 [00:01<00:00, 79771.54KB/s]
     77%|#######6  | 101909/132723 [00:01<00:00, 78995.62KB/s]
     83%|########2 | 109908/132723 [00:01<00:00, 79290.01KB/s]
     89%|########8 | 117839/132723 [00:01<00:00, 78900.02KB/s]
     95%|########
 #4| 125776/132723 [00:01<00:00, 79038.21KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 78401.34KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      5%|4         | 6448/132723 [00:00<00:01, 64472.99KB/s]
     11%|#1        | 15048/132723 [00:00<00:01, 77132.28KB/s]
     17%|#7        | 22762/132723 [00:00<00:01, 70546.96KB/s]
     24%|##3       | 31405/132723 [00:00<00:01, 76478.66KB/s]
     30%|###       | 40055/132723 [00:00<00:01, 79963.79KB/s]
     36%|###6      | 48218/132723 [00:00<00:01, 80514.42KB/s]
     42%|####2     | 56309/132723 [00:00<00:01, 73937.70KB/s]
     49%|####8     | 64942/132723 [00:00<00:00, 77609.39KB/s]
     55%|#####4    | 72809/132723 [00:00<00:00, 76322.25KB/s]
     61%|######1   | 81421/132723 [00:01<00:00, 79197.94KB/s]
     67%|######7   | 89406/132723 [00:01<00:00, 78898.33KB/s]
     74%|#######3  | 98029/132723 [00:01<00:00, 81063.33KB/s]
     80%|########  | 106720/132723 [00:01<00:00, 82796.24KB/s]
     87%|########6 | 115029/132723 [00:01<00:00, 80953.18KB/s]
     93%|#########2| 123421/132723 [00:01<00:00, 81821.98KB/s]
    100%|########
 #9| 132186/132723 [00:01<00:00, 83542.16KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 79265.78KB/s]
 
 
 
@@ -241,7 +241,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  31.696 seconds)
+   **Total running time of the script:** ( 2 minutes  36.102 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 5e1baf177..2c9106a7f 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**10:38.537** total execution time for **how_to_deploy_models** files:
+**11:02.182** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 02:56.292 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:03.849 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:31.696 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:36.102 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:51.782 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:52.525 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:18.719 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:26.287 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:08.511 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:09.946 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:29.362 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:30.712 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.169 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.757 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 2ace1e85e..7d6740e2c 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -476,7 +476,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.ziped449611-ba6b-4262-abcd-0f67dd37956f from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip37305150-8aaa-4c94-8db0-c7a97f22fa14 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
@@ -590,7 +590,7 @@ Now, to actually convert the entire network, we have written `a pass in Relay <h
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-      Check failed: (lower) is false: Intrinsic lowering function for target llvm, intrinsic name tir.sqrt, type 150 not found
+      Check failed: (lower) is false: FloatImm lowering function for target llvm type 150 not found
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 89968462c..63605b796 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:40.233** total execution time for **how_to_extend_tvm** files:
+**00:41.921** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:37.095 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:38.654 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.207 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.295 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.924 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.965 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.007 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.008 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index f25b592c3..25bddf1e6 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -216,10 +216,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6612us [6612us] (45.43%; 45.43%)
-    FoldScaleAxis: 7941us [6us] (54.57%; 54.57%)
-            FoldConstant: 7935us [1596us] (54.53%; 99.93%)
-                    InferType: 6339us [6339us] (43.56%; 79.89%)
+    InferType: 6823us [6823us] (45.33%; 45.33%)
+    FoldScaleAxis: 8228us [7us] (54.67%; 54.67%)
+            FoldConstant: 8222us [1659us] (54.62%; 99.92%)
+                    InferType: 6562us [6562us] (43.60%; 79.82%)
 
 
 
@@ -258,10 +258,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6191us [6191us] (44.33%; 44.33%)
-    FoldScaleAxis: 7775us [4us] (55.67%; 55.67%)
-            FoldConstant: 7770us [1629us] (55.64%; 99.94%)
-                    InferType: 6141us [6141us] (43.97%; 79.03%)
+    InferType: 6569us [6569us] (44.87%; 44.87%)
+    FoldScaleAxis: 8071us [6us] (55.13%; 55.13%)
+            FoldConstant: 8065us [1679us] (55.09%; 99.93%)
+                    InferType: 6387us [6387us] (43.63%; 79.19%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index f06d2e79a..19a046367 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -340,7 +340,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 54.167493 ms
+    Convolution: 54.117075 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 2629dda2d..1c2486587 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -671,7 +671,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 6.615088 ms
+    conv2d with tensor core: 13.361453 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index 75c6d19b3..6a8f8a947 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -143,8 +143,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.018717
-    Baseline: 3.340915
+    Numpy running time: 0.017869
+    Baseline: 3.290199
 
 
 
@@ -239,7 +239,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.294002
+    Opt1: 0.309832
 
 
 
@@ -342,7 +342,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.333302
+    Opt2: 0.346444
 
 
 
@@ -438,7 +438,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.118043
+    Opt3: 0.118942
 
 
 
@@ -563,7 +563,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.110974
+    Opt4: 0.109270
 
 
 
@@ -685,7 +685,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.112042
+    Opt5: 0.103795
 
 
 
@@ -810,7 +810,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.145295
+    Opt6: 0.144459
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index e66ec7217..2d6bfc8ce 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:34.252** total execution time for **how_to_optimize_operators** files:
+**00:34.305** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:31.922 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:31.852 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.296 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.379 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.034 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.073 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 888a791ed..2e9b591c0 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**05:58.178** total execution time for **how_to_tune_with_autoscheduler** files:
+**06:01.892** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 03:11.755 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 03:14.483 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:22.220 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:23.600 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:45.922 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:46.497 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:20.657 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:19.399 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:08.842 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:09.054 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.782 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.859 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index f532b2c75..93e2ebfed 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -240,483 +240,105 @@ cooperative fetching, unrolling and operator fusion.
                  compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 28;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 64;
+      allocate(conv2d_nchw: Pointer(local float32), float32, [7]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [1008]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [384]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [7], [], scope="local", align=16)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[2] = 0f32
         conv2d_nchw_1[3] = 0f32
         conv2d_nchw_1[4] = 0f32
         conv2d_nchw_1[5] = 0f32
         conv2d_nchw_1[6] = 0f32
-        conv2d_nchw_1[7] = 0f32
-        conv2d_nchw_1[8] = 0f32
-        conv2d_nchw_1[9] = 0f32
-        conv2d_nchw_1[10] = 0f32
-        conv2d_nchw_1[11] = 0f32
-        conv2d_nchw_1[12] = 0f32
-        conv2d_nchw_1[13] = 0f32
-        for (rc.outer.outer: int32, 0, 64) {
+        for (rc.outer.outer: int32, 0, 32) {
           for (ry.outer.outer: int32, 0, 3) {
-            let cse_var_2: int32 = (rc.outer.outer*72)
+            let cse_var_4: int32 = (rc.outer.outer*784)
+            let cse_var_3: int32 = (ry.outer.outer*7)
+            let cse_var_2: int32 = (rc.outer.outer*144)
             let cse_var_1: int32 = (ry.outer.outer*3)
              {
-              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope="shared")[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f3 [...]
-                }
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
-                }
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
-                }
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
-                }
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [1008], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else(((((1 <= (floordiv(threadIdx.x_1, 9) + ry.outer.outer)) && ((floordiv(threadIdx.x_1, 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 56)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 56), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 112)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 112), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 168)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 168), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 224), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 280)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 1), 9))) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 280), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 336)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 336), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 392), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 448)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 448), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 504)] = @tir.if_then_else(((((1 <= (floordiv(threadIdx.x_1, 9) + ry.outer.outer)) && ((floordiv(threadIdx.x_1, 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 384)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 560)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 2), 9))) && (floormod((threadIdx.x_1 + 2), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 560), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 616)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 4), 9))) && (floormod((threadIdx.x_1 + 4), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 616), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 672)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 6), 9))) && (floormod((threadIdx.x_1 + 6), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 672), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 728)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 728), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 1), 9))) && (floormod((threadIdx.x_1 + 1), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 784), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 840)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 3), 9))) && (floormod((threadIdx.x_1 + 3), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 840), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 896)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 5), 9))) && (floormod((threadIdx.x_1 + 5), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 896), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              pad_temp.shared_1[(threadIdx.x_1 + 952)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 952), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
+              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1: Buffer(kernel.shared, float32, [384], [], scope="shared")[threadIdx.x_2] = kernel[((((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 48)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 48), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 56), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 112), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 168), 48)*4608)) + cse_var_2) + (floormod((floordiv(threadIdx.x_2, 3) + 8), 16)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 224), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 32), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 280), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 40), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 56;
+              if @tir.likely((threadIdx.x_2 < 48), dtype=bool) {
+                kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*36864) + cse_var_2) + (floordiv(threadIdx.x_2, 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 32256)]
+              }
+              for (rc.outer.inner: int32, 0, 16) {
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*63) + floormod(threadIdx.x, 7))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 9)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 18)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 27)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 28)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 29)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 36)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 37)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 38)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 45)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 46)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 47)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 54)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 55)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 56)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
               }
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope="shared")[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 64), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 128), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 256), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 320), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 512), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 640), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 704), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 832), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1024), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1088), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1216), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1280), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1408), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1472), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1600), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1664), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1792), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1856), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1984), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2048), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2176), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2240), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2368), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2432), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2560), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2624), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2752), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2816), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2944), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 3008), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
             }
           }
         }
-        for (i1.inner: int32, 0, 2) {
-          for (i3.inner: int32, 0, 7) {
-            compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          }
+        for (i2.inner: int32, 0, 7) {
+          compute[((((blockIdx.x*392) + (floordiv(threadIdx.x, 7)*49)) + (i2.inner*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[i2.inner] + bias[((blockIdx.x*8) + floordiv(threadIdx.x, 7))]), 0f32)
         }
       }
     }
@@ -771,7 +393,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.358 ms
+    Execution time of this operator: 0.287 ms
 
 
 
@@ -820,35 +442,35 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
     conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
-    conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
+    conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=1)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
     conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
     conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
-    conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+    conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=3)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
     compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
-    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
+    compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
     compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -868,14 +490,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 64)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -893,10 +515,10 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[14];
-      __shared__ float pad_temp_shared[72];
-      __shared__ float kernel_shared[3072];
+    extern "C" __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[7];
+      __shared__ float pad_temp_shared[1008];
+      __shared__ float kernel_shared[384];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
@@ -904,419 +526,64 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       conv2d_nchw[4] = 0.000000e+00f;
       conv2d_nchw[5] = 0.000000e+00f;
       conv2d_nchw[6] = 0.000000e+00f;
-      conv2d_nchw[7] = 0.000000e+00f;
-      conv2d_nchw[8] = 0.000000e+00f;
-      conv2d_nchw[9] = 0.000000e+00f;
-      conv2d_nchw[10] = 0.000000e+00f;
-      conv2d_nchw[11] = 0.000000e+00f;
-      conv2d_nchw[12] = 0.000000e+00f;
-      conv2d_nchw[13] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
+      for (int rc_outer_outer = 0; rc_outer_outer < 32; ++rc_outer_outer) {
         for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
           __syncthreads();
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 9))) && (((((int)threadIdx.x) * 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
-          }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 9))) && ((((((int)threadIdx.x) * 4) + 1) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
-          }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 9))) && ((((((int)threadIdx.x) * 4) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[((int)threadIdx.x)] = (((((1 <= ((((int)threadIdx.x) / 9) + ry_outer_outer)) && (((((int)threadIdx.x) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 56)] = (((((1 <= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 56) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 112)] = (((((1 <= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 112) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 168)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 168) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 224) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 280)] = (((((1 <= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 1) % 9))) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 280) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 336)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 336) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 392)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 392) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 448)] = (((((1 <= (((((int)threadIdx.x) + 7) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) + 7) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 448) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 504)] = (((((1 <= ((((int)threadIdx.x) / 9) + ry_outer_outer)) && (((((int)threadIdx.x) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 384)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 560)] = (((((1 <= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 2) % 9))) && (((((int)threadIdx.x) + 2) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 560) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 616)] = (((((1 <= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 4) % 9))) && (((((int)threadIdx.x) + 4) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 616) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 672)] = (((((1 <= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 6) % 9))) && (((((int)threadIdx.x) + 6) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 672) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 728)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 728) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 <= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 1) % 9))) && (((((int)threadIdx.x) + 1) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 784) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 840)] = (((((1 <= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 3) % 9))) && (((((int)threadIdx.x) + 3) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 840) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 896)] = (((((1 <= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 5) % 9))) && (((((int)threadIdx.x) + 5) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 896) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[(((int)threadIdx.x) + 952)] = (((((1 <= (((((int)threadIdx.x) + 7) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) + 7) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 952) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+          kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) / 48) * 4608)) + (rc_outer_outer * 144)) + (((((int)threadIdx.x) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 56)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 56) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 8) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 112) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 16) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 168)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 168) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) / 3) + 8) & 15) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 224) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 32) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 280) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 40) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          if (((int)threadIdx.x) < 48) {
+            kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 144)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 32256)];
           }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 9))) && ((((((int)threadIdx.x) * 4) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
-          kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
-          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
-          kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
-          kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
-          kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
-          kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
-          kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
-          kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
-          kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
-          kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
-          kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
-          kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
-          kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
-          kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
-          kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
           __syncthreads();
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          for (int rc_outer_inner = 0; rc_outer_inner < 16; ++rc_outer_inner) {
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 63) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 9)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 18)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 27)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 28)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 29)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 36)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 37)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 38)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 45)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 46)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 47)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 54)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 55)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 56)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+          }
         }
       }
-      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
-        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
-          compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        }
+      for (int i2_inner = 0; i2_inner < 7; ++i2_inner) {
+        compute[((((((int)blockIdx.x) * 392) + ((((int)threadIdx.x) / 7) * 49)) + (i2_inner * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[i2_inner] + bias[((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
       }
     }
 
@@ -1378,7 +645,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  11.755 seconds)
+   **Total running time of the script:** ( 3 minutes  14.483 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index 64c42a1d3..e7993e5c1 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -647,7 +647,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      10.0768      10.1030      10.1199      10.0076       0.0494   
+       9.8601       9.8492       9.8942       9.8368       0.0246   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index f63aca3cb..fdeb5d24b 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -666,7 +666,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      760.8700     760.6951     761.9084     760.0065      0.7862   
+      768.2140     768.1146     768.6868     767.8406      0.3525   
                
 
 
@@ -694,7 +694,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  22.220 seconds)
+   **Total running time of the script:** ( 1 minutes  23.600 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index 4298ac3aa..a64e615de 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -397,77 +397,75 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {placeholder_8: placeholder_15: Buffer(placeholder_13, int32, [33], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_9: placeholder_17: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_7: placeholder_18: Buffer(placeholder_12, int32, [4916], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], [])} {
-      for (i0.outer.i1.outer.fused: int32, 0, 128) "parallel" {
-        allocate(compute_4: Pointer(global float32), float32, [512]), storage_scope = global {
-          for (i.outer.inner: int32, 0, 2) {
-            for (nb_j.inner: int32, 0, 2) {
-              for (i.inner.init: int32, 0, 8) {
-                let cse_var_1: int32 = (((i.outer.inner*256) + (i.inner.init*32)) + (nb_j.inner*16))
-                 {
-                  compute_5: Buffer(compute_4, float32, [512], [])[cse_var_1] = 0f32
-                  compute_5[(cse_var_1 + 1)] = 0f32
-                  compute_5[(cse_var_1 + 2)] = 0f32
-                  compute_5[(cse_var_1 + 3)] = 0f32
-                  compute_5[(cse_var_1 + 4)] = 0f32
-                  compute_5[(cse_var_1 + 5)] = 0f32
-                  compute_5[(cse_var_1 + 6)] = 0f32
-                  compute_5[(cse_var_1 + 7)] = 0f32
-                  compute_5[(cse_var_1 + 8)] = 0f32
-                  compute_5[(cse_var_1 + 9)] = 0f32
-                  compute_5[(cse_var_1 + 10)] = 0f32
-                  compute_5[(cse_var_1 + 11)] = 0f32
-                  compute_5[(cse_var_1 + 12)] = 0f32
-                  compute_5[(cse_var_1 + 13)] = 0f32
-                  compute_5[(cse_var_1 + 14)] = 0f32
-                  compute_5[(cse_var_1 + 15)] = 0f32
-                }
+      preflattened_buffer_map = {placeholder_7: placeholder_15: Buffer(placeholder_12, int32, [4916], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_9: placeholder_18: Buffer(placeholder_14, float32, [128, 512], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], [])} {
+      for (i0.outer.i1.outer.fused: int32, 0, 256) "parallel" {
+        allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
+          for (nb_j.inner: int32, 0, 2) {
+            for (i.inner.init: int32, 0, 8) {
+              let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
+               {
+                compute_5: Buffer(compute_4, float32, [256], [])[cse_var_1] = 0f32
+                compute_5[(cse_var_1 + 1)] = 0f32
+                compute_5[(cse_var_1 + 2)] = 0f32
+                compute_5[(cse_var_1 + 3)] = 0f32
+                compute_5[(cse_var_1 + 4)] = 0f32
+                compute_5[(cse_var_1 + 5)] = 0f32
+                compute_5[(cse_var_1 + 6)] = 0f32
+                compute_5[(cse_var_1 + 7)] = 0f32
+                compute_5[(cse_var_1 + 8)] = 0f32
+                compute_5[(cse_var_1 + 9)] = 0f32
+                compute_5[(cse_var_1 + 10)] = 0f32
+                compute_5[(cse_var_1 + 11)] = 0f32
+                compute_5[(cse_var_1 + 12)] = 0f32
+                compute_5[(cse_var_1 + 13)] = 0f32
+                compute_5[(cse_var_1 + 14)] = 0f32
+                compute_5[(cse_var_1 + 15)] = 0f32
               }
-              for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-                for (i.inner: int32, 0, 8) {
-                  let cse_var_21: int32 = (elem_idx*16)
-                  let cse_var_20: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-                  let cse_var_19: int32 = (((i.outer.inner*256) + (i.inner*32)) + (nb_j.inner*16))
-                  let cse_var_18: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.outer.inner*2048)) + (i.inner*256))
-                  let cse_var_17: int32 = (cse_var_19 + 9)
-                  let cse_var_16: int32 = (cse_var_19 + 8)
-                  let cse_var_15: int32 = (cse_var_19 + 7)
-                  let cse_var_14: int32 = (cse_var_19 + 6)
-                  let cse_var_13: int32 = (cse_var_19 + 5)
-                  let cse_var_12: int32 = (cse_var_19 + 4)
-                  let cse_var_11: int32 = (cse_var_19 + 3)
-                  let cse_var_10: int32 = (cse_var_19 + 2)
-                  let cse_var_9: int32 = (cse_var_19 + 15)
-                  let cse_var_8: int32 = (cse_var_19 + 14)
-                  let cse_var_7: int32 = (cse_var_19 + 13)
-                  let cse_var_6: int32 = (cse_var_19 + 12)
-                  let cse_var_5: int32 = (cse_var_19 + 11)
-                  let cse_var_4: int32 = (cse_var_19 + 10)
-                  let cse_var_3: int32 = (cse_var_19 + 1)
-                   {
-                    compute_5[cse_var_19] = (compute_5[cse_var_19] + (placeholder_1[((placeholder_3[cse_var_20]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                    compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                  }
+            }
+            for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+              for (i.inner: int32, 0, 8) {
+                let cse_var_21: int32 = (elem_idx*16)
+                let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
+                let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+                let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*2048) + (i.inner*256))
+                let cse_var_17: int32 = (cse_var_20 + 9)
+                let cse_var_16: int32 = (cse_var_20 + 8)
+                let cse_var_15: int32 = (cse_var_20 + 7)
+                let cse_var_14: int32 = (cse_var_20 + 6)
+                let cse_var_13: int32 = (cse_var_20 + 5)
+                let cse_var_12: int32 = (cse_var_20 + 4)
+                let cse_var_11: int32 = (cse_var_20 + 3)
+                let cse_var_10: int32 = (cse_var_20 + 2)
+                let cse_var_9: int32 = (cse_var_20 + 15)
+                let cse_var_8: int32 = (cse_var_20 + 14)
+                let cse_var_7: int32 = (cse_var_20 + 13)
+                let cse_var_6: int32 = (cse_var_20 + 12)
+                let cse_var_5: int32 = (cse_var_20 + 11)
+                let cse_var_4: int32 = (cse_var_20 + 10)
+                let cse_var_3: int32 = (cse_var_20 + 1)
+                 {
+                  compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 16) {
-            let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+          for (i0.inner: int32, 0, 8) {
+            let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
             compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
           }
         }
@@ -524,7 +522,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.861 ms
+    Execution time of this operator: 1.795 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index a40b1a456..82782d00e 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:43.743** total execution time for **how_to_tune_with_autotvm** files:
+**00:44.036** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:43.708 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:44.000 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.020 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 3860ba2bd..81d088633 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -892,8 +892,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
-    No: 6   GFLOPS: 42.35/42.35     result: MeasureResult(costs=(0.005466809578947368,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.624403715133667, timestamp=1657994920.9398527)        [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
-    No: 7   GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 6   GFLOPS: 96.87/96.87     result: MeasureResult(costs=(0.00238975975,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6457629203796387, timestamp=1658166893.5703437)      [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+    No: 7   GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1016,7 +1016,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
-    No: 8   GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 8   GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1139,7 +1139,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
-    No: 9   GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 9   GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1262,7 +1262,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
-    No: 10  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 10  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1280,7 +1280,7 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
-    No: 11  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 11  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1403,7 +1403,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
-    No: 12  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 12  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1526,7 +1526,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
-    No: 13  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1649,7 +1649,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
-    No: 14  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1772,7 +1772,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
-    No: 15  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 15  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1895,7 +1895,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
-    No: 16  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 16  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2018,7 +2018,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
-    No: 17  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 17  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2141,7 +2141,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
-    No: 18  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 18  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2264,7 +2264,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
-    No: 19  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+    No: 19  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 738, in __call__
         yield remote, remote.load_module(os.path.split(build_result.filename)[1])
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 702, in run_through_rpc
@@ -2352,7 +2352,7 @@ for this template
       15: _PyEval_EvalFrameDefault
       14: 0x0000000000537c30
       13: _PyObject_FastCallKeywords
-      12: 0x00007f3cfa5bffa2
+      12: 0x00007f51fe81dfa2
       11: _ctypes_callproc
       10: ffi_call
       9: ffi_call_unix64
@@ -2417,7 +2417,7 @@ for this template
       21: _PyFunction_FastCallKeywords
       20: _PyEval_EvalFrameDefault
       19: _PyFunction_FastCall      [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
-    No: 20  GFLOPS: 144.60/144.60   result: MeasureResult(costs=(0.00160099481,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.429844617843628, timestamp=1657994947.512822)        [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+    No: 20  GFLOPS: 144.94/144.94   result: MeasureResult(costs=(0.00159719953,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4513959884643555, timestamp=1658166920.2266512)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
 
 
 
@@ -2474,7 +2474,7 @@ and measure running time.
     Best config:
     [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
     Finish loading 20 records
-    Time cost of this operator: 0.002011
+    Time cost of this operator: 0.002046
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 4a553d27d..4dcc7f5fe 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -329,10 +329,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.9     98.721   (1, 2, 10, 10, 3)  2       1        [312.9]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.094     0.976    (1, 6, 10, 10)     1       1        [3.094]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.961     0.303    (1, 1, 10, 10, 3)  1       1        [0.961]           
-    Total_time                                    -                                             316.954   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.4     98.727   (1, 2, 10, 10, 3)  2       1        [309.4]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.056     0.975    (1, 6, 10, 10)     1       1        [3.056]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.933     0.298    (1, 1, 10, 10, 3)  1       1        [0.933]           
+    Total_time                                    -                                             313.389   -        -                  -       -        -                 
 
 
 
@@ -398,10 +398,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  118.4     97.735   (1, 6, 10, 10, 1)  2       1        [118.4]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.772     1.462    (1, 6, 10, 10)     1       1        [1.772]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.972     0.802    (1, 1, 10, 10, 3)  1       1        [0.972]           
-    Total_time                                    -                                             121.144   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  120.7     97.674   (1, 6, 10, 10, 1)  2       1        [120.7]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.943     1.572    (1, 6, 10, 10)     1       1        [1.943]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.931     0.754    (1, 1, 10, 10, 3)  1       1        [0.931]           
+    Total_time                                    -                                             123.574   -        -                  -       -        -                 
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 7fe1eee79..036620a05 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -225,7 +225,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmp6zkkzdua/images/random'
+    '/tmp/tmpd9td4hce/images/random'
 
 
 
@@ -325,8 +325,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmp6zkkzdua/images/target contains 8144 images
-    /tmp/tmp6zkkzdua/images/random contains 5000 images
+    /tmp/tmpd9td4hce/images/target contains 8144 images
+    /tmp/tmpd9td4hce/images/random contains 5000 images
 
 
 
@@ -501,13 +501,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 55s - loss: 0.2461 - accuracy: 0.9179 - val_loss: 0.1287 - val_accuracy: 0.9607
+    328/328 - 54s - loss: 0.2192 - accuracy: 0.9238 - val_loss: 0.1492 - val_accuracy: 0.9551
     Epoch 2/3
-    328/328 - 52s - loss: 0.1059 - accuracy: 0.9601 - val_loss: 0.1205 - val_accuracy: 0.9649
+    328/328 - 50s - loss: 0.1006 - accuracy: 0.9611 - val_loss: 0.1121 - val_accuracy: 0.9641
     Epoch 3/3
-    328/328 - 52s - loss: 0.0725 - accuracy: 0.9718 - val_loss: 0.1118 - val_accuracy: 0.9641
+    328/328 - 51s - loss: 0.0702 - accuracy: 0.9738 - val_loss: 0.1290 - val_accuracy: 0.9547
 
-    <keras.callbacks.History object at 0x7f053fd2ad50>
+    <keras.callbacks.History object at 0x7f397ddf9090>
 
 
 
@@ -864,7 +864,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 4 minutes  40.304 seconds)
+   **Total running time of the script:** ( 4 minutes  52.807 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 2adcea7b5..b162c1336 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**05:29.397** total execution time for **how_to_work_with_microtvm** files:
+**05:40.707** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:40.304 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:52.807 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:45.646 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:44.473 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.446 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.425 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.001 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 1c7845fb7..14ba6c880 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:11.496** total execution time for **how_to_work_with_relay** files:
+**00:11.637** total execution time for **how_to_work_with_relay** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:09.940 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:10.115 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.549 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.516 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)       | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index 726164548..2cdac8e60 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -261,7 +261,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7f04c30e39e0>
+    <function my_cuda_math_rule at 0x7f38e432ab00>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 215d32c7d..50e5d84d8 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**00:04.053** total execution time for **how_to_work_with_schedules** files:
+**00:04.378** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.873 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:02.027 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.967 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.049 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.523 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.567 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.507 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.550 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.100 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.102 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.040 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.027 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.029 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.015 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 4723dd583..fc136f70d 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -347,7 +347,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmptfa1kiz5/input0.cc'\nsource_filename = \"/tmp/tmptfa1kiz5/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpknbs3_1z/input0.cc'\nsource_filename = \"/tmp/tmpknbs3_1z/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 83cb82b18..95dd7eea6 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:21.291** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:22.772** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:21.285 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:22.765 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.007 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 7e085f32e..38228f3f6 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -291,7 +291,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 23.03s!
+    resnet18_v1 inference graph built in 23.84s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index edf714b3a..b38243974 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -335,7 +335,7 @@ The compilation steps are:
       "target_host parameter is going to be deprecated. "
     /workspace/python/tvm/relay/build_module.py:411: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 15.96s!
+    yolov3-tiny inference graph built in 16.37s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index 5b94f00d0..4319e391f 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:31.742** total execution time for **topic_vta_tutorials_frontend** files:
+**01:33.623** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:48.413 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:49.397 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:43.329 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:44.227 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 00aca335b..5e35af424 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.259** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.284** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.864 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.854 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.395 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.429 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index 9b8b6a701..53befdc4e 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.695** total execution time for **topic_vta_tutorials** files:
+**00:00.803** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.362 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.433 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.333 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.370 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 38d7a010e..b10046a1d 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -328,7 +328,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 93.849 ms
+    Execution time of this operator: 94.345 ms
 
 
 
@@ -446,7 +446,7 @@ operations.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  3.827 seconds)
+   **Total running time of the script:** ( 1 minutes  5.379 seconds)
 
 
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index d6586912a..adfb78f88 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -462,16 +462,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 9.95/9.95       result: MeasureResult(costs=(0.0269827518,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5653500556945801, timestamp=1657993790.9904552)       [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
-    No: 2   GFLOPS: 2.61/9.95       result: MeasureResult(costs=(0.1026655072,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7924566268920898, timestamp=1657993792.7958822)       [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
-    No: 3   GFLOPS: 11.76/11.76     result: MeasureResult(costs=(0.022822672600000003,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.553189754486084, timestamp=1657993793.8561041)        [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
-    No: 4   GFLOPS: 1.85/11.76      result: MeasureResult(costs=(0.1448757596,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.429588794708252, timestamp=1657993796.8633137)        [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
-    No: 5   GFLOPS: 3.61/11.76      result: MeasureResult(costs=(0.074365281,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.325679063796997, timestamp=1657993798.3228643) [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
-    No: 6   GFLOPS: 1.72/11.76      result: MeasureResult(costs=(0.15626918039999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6226701736450195, timestamp=1657993801.5201585)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
-    No: 7   GFLOPS: 0.86/11.76      result: MeasureResult(costs=(0.31273552060000004,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.121850490570068, timestamp=1657993806.6852708) [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 8   GFLOPS: 10.54/11.76     result: MeasureResult(costs=(0.0254795746,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5479702949523926, timestamp=1657993807.254921)        [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
-    No: 9   GFLOPS: 1.90/11.76      result: MeasureResult(costs=(0.1414890418,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.359858989715576, timestamp=1657993809.735181) [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
-    No: 10  GFLOPS: 2.72/11.76      result: MeasureResult(costs=(0.0985276802,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6729373931884766, timestamp=1657993811.465475)        [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
+    No: 1   GFLOPS: 8.12/8.12       result: MeasureResult(costs=(0.033047602999999995,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6638576984405518, timestamp=1658165722.968305)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 2   GFLOPS: 2.56/8.12       result: MeasureResult(costs=(0.10500200879999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8327655792236328, timestamp=1658165724.8144603)        [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
+    No: 3   GFLOPS: 11.75/11.75     result: MeasureResult(costs=(0.022842748599999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.548677921295166, timestamp=1658165725.8876626)        [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
+    No: 4   GFLOPS: 1.86/11.75      result: MeasureResult(costs=(0.144233974,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.419346809387207, timestamp=1658165728.9034863) [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
+    No: 5   GFLOPS: 3.65/11.75      result: MeasureResult(costs=(0.07357117319999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.319528341293335, timestamp=1658165730.3538802) [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
+    No: 6   GFLOPS: 1.78/11.75      result: MeasureResult(costs=(0.15079883219999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.539217472076416, timestamp=1658165733.4705992) [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
+    No: 7   GFLOPS: 0.87/11.75      result: MeasureResult(costs=(0.30862258480000004,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.057675361633301, timestamp=1658165738.57236)   [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 8   GFLOPS: 10.75/11.75     result: MeasureResult(costs=(0.0249610402,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5476598739624023, timestamp=1658165739.1377902)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
+    No: 9   GFLOPS: 1.90/11.75      result: MeasureResult(costs=(0.14125019440000003,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.3684725761413574, timestamp=1658165741.6288157)        [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
+    No: 10  GFLOPS: 2.57/11.75      result: MeasureResult(costs=(0.10448357940000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7850146293640137, timestamp=1658165743.4649265)        [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 44859ab79..a35da1ab9 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -327,7 +327,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 495.4212594799992, 'median': 495.1658676500074, 'std': 0.8481904206234964}
+    {'mean': 499.04213453998636, 'median': 498.90793589997884, 'std': 0.6439924833093513}
 
 
 
@@ -563,31 +563,31 @@ the tuning data to.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.46/  17.46 GFLOPS | Progress: (4/20) | 6.29 s
    [Task  1/25]  Current/Best:    6.15/  17.46 GFLOPS | Progress: (8/20) | 9.27 s
    [Task  1/25]  Current/Best:   11.56/  22.81 GFLOPS | Progress: (12/20) | 11.70 s
    [Task  1/25]  Current/Best:   16.77/  22.84 GFLOPS | Progress: (16/20) | 13.38 s
    [Task  1/25]  Current/Best:   11.63/  23.92 GFLOPS | Progress: (20/20) | 15.12 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.29/  13.08 GFLOPS | Progress: (4/20) | 3.63 s
    [Task  2/25]  Current/Best:   13.98/  18.46 GFLOPS | Progress: (8/20) | 4.94 s
    [Task  2/25]  Current/Best:   21.24/  21.24 GFLOPS | Progress: (12/20) | 6.29 s
    [Task  2/25]  Current/Best:   12.05/  21.24 GFLOPS | Progress: (16/20) | 7.54 s
    [Task  2/25]  Current/Best:   18.83/  21.24 GFLOPS | Progress: (20/20) | 9.10 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.58 GFLOPS | Progress: (4/20) | 5.85 s
    [Task  3/25]  Current/Best:   15.58/  16.85 GFLOPS | Progress: (8/20) | 7.77 s
    [Task  3/25]  Current/Best:   14.91/  16.85 GFLOPS | Progress: (12/20) | 9.49 s
    [Task  3/25]  Current/Best:    7.17/  23.72 GFLOPS | Progress: (16/20) | 11.40 s
    [Task  3/25]  Current/Best:   12.50/  23.72 GFLOPS | Progress: (20/20) | 15.91 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.56/  20.49 GFLOPS | Progress: (4/20) | 2.38 s
    [Task  4/25]  Current/Best:    6.85/  20.49 GFLOPS | Progress: (8/20) | 6.68 s
    [Task  4/25]  Current/Best:   22.01/  22.01 GFLOPS | Progress: (12/20) | 11.22 s
    [Task  4/25]  Current/Best:   16.50/  22.01 GFLOPS | Progress: (16/20) | 13.45 s
    [Task  4/25]  Current/Best:   13.47/  22.01 GFLOPS | Progress: (20/20) | 15.33 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.47/  10.28 GFLOPS | Progress: (4/20) | 2.60 s
    [Task  5/25]  Current/Best:   11.69/  11.69 GFLOPS | Progress: (8/20) | 4.68 s
    [Task  5/25]  Current/Best:   11.56/  18.04 GFLOPS | Progress: (12/20) | 7.75 s
    [Task  5/25]  Current/Best:   11.77/  22.85 GFLOPS | Progress: (16/20) | 9.16 s
    [Task  5/25]  Current/Best:   12.12/  22.85 GFLOPS | Progress: (20/20) | 11.02 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.25/  20.72 GFLOPS | Progress: (4/20) | 3.97 s
    [Task  6/25]  Current/Best:   18.91/  20.72 GFLOPS | Progress: (8/20) | 5.73 s
    [Task  6/25]  Current/Best:   13.27/  20.72 GFLOPS | Progress: (12/20) | 7.67 s
    [Task  6/25]  Current/Best:   20.04/  20.72 GFLOPS | Progress: (16/20) | 9.89 s
    [Task  6/25]  Current/Best:    3.68/  20.72 GFLOPS | Progress: (20/20) | 12.40 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.20/  12.95 GFLOPS | Progress: (4/20) | 3.52 s
    [Task  7/25]  Current/Best:   20.31/  21.21 GFLOPS | Progress: (8/20) | 5.03 s
    [Task  7/25]  Current/Best:   15.95/  21.21 GFLOPS | Progress: (12/20) | 6.92 s
    [Task  7/25]  Current/Best:   12.22/  21.21 GFLOPS | Progress: (16/20) | 8.98 s
    [Task  7/25]  Current/Best:    6.32/  21.65 GFLOPS | Progress: (20/20) | 11.44 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.25/  13.88 GFLOPS | Progress: (4/20) | 2.94 s
    [Task  8/25]  Current/Best:    9.32/  13.88 GFLOPS | Progress: (8/20) | 7.66 s
    [Task  8/25]  Current/Best:   12.70/  13.88 GFLOPS | Progress: (12/20) | 13.84 s
    [Task  8/25]  Current/Best:   18.67/  18.67 GFLOPS | Progress: (16/20) | 15.90 s
    [Task  8/25]  Current/Best:   19.27/  19.27 GFLOPS | Progress: (20/20) | 22.35 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.38/  15.57 GFLOPS | Progress: (4/20) | 11.95 s
    [Task  9/25]  Current/Best:   23.52/  23.52 GFLOPS | Progress: (8/20) | 13.77 s
    [Task  9/25]  Current/Best:    8.23/  23.52 GFLOPS | Progress: (12/20) | 16.11 s
    [Task  9/25]  Current/Best:   17.90/  23.52 GFLOPS | Progress: (16/20) | 18.76 s
    [Task  9/25]  Current/Best:    9.24/  23.52 GFLOPS | Progress: (20/20) | 26.42 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.09/  18.09 GFLOPS | Progress: (4/20) | 2.56 s
    [Task 10/25]  Current/Best:   15.49/  18.09 GFLOPS | Progress: (8/20) | 4.12 s
    [Task 10/25]  Current/Best:   12.70/  18.85 GFLOPS | Progress: (12/20) | 5.65 s
    [Task 10/25]  Current/Best:   18.89/  20.45 GFLOPS | Progress: (16/20) | 6.75 s
    [Task 10/25]  Current/Best:    8.88/  20.45 GFLOPS | Progress: (20/20
 ) | 8.31 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.29/  18.14 GFLOPS | Progress: (4/20) | 3.29 s
    [Task 11/25]  Current/Best:   16.80/  18.14 GFLOPS | Progress: (8/20) | 6.02 s
    [Task 11/25]  Current/Best:   18.12/  18.14 GFLOPS | Progress: (12/20) | 8.08 s
    [Task 11/25]  Current/Best:   13.31/  21.17 GFLOPS | Progress: (16/20) | 10.85 s
    [Task 11/25]  Current/Best:   19.46/  21.58 GFLOPS | Progress: (20/20) | 12.87 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.80/  17.97 GFLOPS | Progress: (4/20) | 5.40 s
    [Task 12/25]  Current/Best:    5.17/  17.97 GFLOPS | Progress: (8/20) | 9.11 s
    [Task 12/25]  Current/Best:   18.84/  18.84 GFLOPS | Progress: (12/20) | 11.13 s
    [Task 12/25]  Current/Best:   15.41/  18.84 GFLOPS | Progress: (16/20) | 13.89 s
    [Task 12/25]  Current/Best:   15.19/  18.84 GFLOPS | Progress: (20/20) | 15.80 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.84/  17.26 GFLOPS | Progress: (4/20) | 3.68 s
    [Task 13/25]  Current/Best:   15.39/  21.00 GFLOPS | Progress: (8/20) | 6.12 s
    [Task 13/25]  Current/Best:   19.33/  21.68 GFLOPS | Progress: (12/20) | 9.02 s
    [Task 13/25]  Current/Best:   12.24/  21.68 GFLOPS | Progress: (16/20) | 12.44 s
    [Task 13/25]  Current/Best:   18.90/  21.68 GFLOPS | Progress: (20/20) | 14.68 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.72/  13.72 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 14/25]  Current/Best:    6.03/  13.72 GFLOPS | Progress: (8/20) | 5.54 s
    [Task 14/25]  Current/Best:   21.15/  21.15 GFLOPS | Progress: (12/20) | 8.12 s
    [Task 14/25]  Current/Best:   18.08/  21.15 GFLOPS | Progress: (16/20) | 9.74 s Done.
-
    [Task 14/25]  Current/Best:   16.22/  21.15 GFLOPS | Progress: (20/20) | 11.54 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.16/  17.65 GFLOPS | Progress: (4/20) | 2.73 s
    [Task 15/25]  Current/Best:   14.45/  18.02 GFLOPS | Progress: (8/20) | 4.02 s
    [Task 15/25]  Current/Best:   10.36/  22.27 GFLOPS | Progress: (12/20) | 6.10 s
    [Task 15/25]  Current/Best:   20.37/  22.27 GFLOPS | Progress: (16/20) | 9.05 s
    [Task 15/25]  Current/Best:    9.67/  22.27 GFLOPS | Progress: (20/20) | 10.02 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.91/  20.91 GFLOPS | Progress: (4/20) | 2.94 s
    [Task 16/25]  Current/Best:    3.04/  20.91 GFLOPS | Progress: (8/20) | 4.55 s
    [Task 16/25]  Current/Best:   19.63/  20.91 GFLOPS | Progress: (12/20) | 5.78 s
    [Task 16/25]  Current/Best:   17.59/  20.91 GFLOPS | Progress: (16/20) |
  7.11 s
    [Task 16/25]  Current/Best:    9.97/  22.27 GFLOPS | Progress: (20/20) | 9.15 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.20/  18.90 GFLOPS | Progress: (4/20) | 4.72 s
    [Task 17/25]  Current/Best:   12.96/  23.03 GFLOPS | Progress: (8/20) | 7.51 s
    [Task 17/25]  Current/Best:   16.75/  23.03 GFLOPS | Progress: (12/20) | 9.58 s
    [Task 17/25]  Current/Best:   16.47/  23.03 GFLOPS | Progress: (16/20) | 11.75 s
    [Task 17/25]  Current/Best:   10.02/  23.03 GFLOPS | Progress: (20/20) | 13.87 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.25/  18.01 GFLOPS | Progress: (4/20) | 3.75 s
    [Task 18/25]  Current/Best:   10.60/  18.46 GFLOPS | Progress: (8/20) | 7.17 s
    [Task 18/25]  Current/Best:   19.03/  19.03 GFLOPS | Progress: (12/20) | 9.09 s
    [Task 18/25]  Current/Best:    9.99/  19.03 GFLOPS | Progress: (16/20) | 12.71 s
    [Task 18/25]  Current/Best:   20.45/  20.45 GFLOPS | Progress: (20/20) | 14.22 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.01/  20.20 GFLOPS | Progress: (4/20) | 6.07 s
    [Task 19/25]  Current/Best:    2.60/  20.20 GFLOPS | Progress: (8/20) | 9.31 s
    [Task 19/25]  Current/Best:   19.20/  21.11 GFLOPS | Progress: (12/20) | 12.12 s
    [Task 19/25]  Current/Best:   15.17/  21.11 GFLOPS | Progress: (16/20) | 14.95 s
    [Task 19/25]  Current/Best:    2.69/  23.20 GFLOPS | Progress: (20/20) | 17.75 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.33/  14.94 GFLOPS | Progress: (4/20) | 3.37 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.43/  17.43 GFLOPS | Progress: (4/20) | 6.41 s
    [Task  1/25]  Current/Best:    6.16/  17.43 GFLOPS | Progress: (8/20) | 9.47 s
    [Task  1/25]  Current/Best:   11.50/  22.71 GFLOPS | Progress: (12/20) | 11.91 s
    [Task  1/25]  Current/Best:   16.69/  22.71 GFLOPS | Progress: (16/20) | 13.62 s
    [Task  1/25]  Current/Best:   11.60/  23.88 GFLOPS | Progress: (20/20) | 15.36 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.19/  13.03 GFLOPS | Progress: (4/20) | 3.87 s
    [Task  2/25]  Current/Best:   14.12/  17.59 GFLOPS | Progress: (8/20) | 5.18 s
    [Task  2/25]  Current/Best:   21.17/  21.17 GFLOPS | Progress: (12/20) | 6.54 s
    [Task  2/25]  Current/Best:   11.67/  21.17 GFLOPS | Progress: (16/20) | 7.83 s
    [Task  2/25]  Current/Best:   19.66/  21.17 GFLOPS | Progress: (20/20) | 9.42 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.51 GFLOPS | Progress: (4/20) | 5.92 s
    [Task  3/25]  Current/Best:   15.36/  16.74 GFLOPS | Progress: (8/20) | 7.87 s
    [Task  3/25]  Current/Best:   14.86/  16.74 GFLOPS | Progress: (12/20) | 9.62 s
    [Task  3/25]  Current/Best:    7.20/  23.74 GFLOPS | Progress: (16/20) | 11.53 s
    [Task  3/25]  Current/Best:   12.57/  23.74 GFLOPS | Progress: (20/20) | 16.07 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.34/  20.04 GFLOPS | Progress: (4/20) | 2.44 s
    [Task  4/25]  Current/Best:    6.66/  20.04 GFLOPS | Progress: (8/20) | 6.82 s
    [Task  4/25]  Current/Best:   21.72/  21.72 GFLOPS | Progress: (12/20) | 11.42 s
    [Task  4/25]  Current/Best:   17.33/  21.72 GFLOPS | Progress: (16/20) | 13.69 s
    [Task  4/25]  Current/Best:   13.15/  21.72 GFLOPS | Progress: (20/20) | 15.67 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.56/  10.22 GFLOPS | Progress: (4/20) | 2.65 s
    [Task  5/25]  Current/Best:   11.64/  12.85 GFLOPS | Progress: (8/20) | 4.71 s
    [Task  5/25]  Current/Best:   10.03/  18.04 GFLOPS | Progress: (12/20) | 7.85 s
    [Task  5/25]  Current/Best:   11.55/  22.48 GFLOPS | Progress: (16/20) | 9.28 s
    [Task  5/25]  Current/Best:   11.82/  22.48 GFLOPS | Progress: (20/20) | 11.17 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.26/  20.76 GFLOPS | Progress: (4/20) | 4.05 s
    [Task  6/25]  Current/Best:   18.98/  20.76 GFLOPS | Progress: (8/20) | 5.83 s
    [Task  6/25]  Current/Best:   13.25/  20.76 GFLOPS | Progress: (12/20) | 7.77 s
    [Task  6/25]  Current/Best:   19.91/  20.76 GFLOPS | Progress: (16/20) | 10.06 s
    [Task  6/25]  Current/Best:    3.73/  20.76 GFLOPS | Progress: (20/20) | 12.59 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.14/  12.74 GFLOPS | Progress: (4/20) | 3.70 s
    [Task  7/25]  Current/Best:   20.22/  20.95 GFLOPS | Progress: (8/20) | 5.23 s
    [Task  7/25]  Current/Best:   12.62/  20.95 GFLOPS | Progress: (12/20) | 7.21 s
    [Task  7/25]  Current/Best:   12.24/  20.95 GFLOPS | Progress: (16/20) | 9.28 s
    [Task  7/25]  Current/Best:    6.37/  21.63 GFLOPS | Progress: (20/20) | 11.76 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.42/  14.55 GFLOPS | Progress: (4/20) | 2.92 s
    [Task  8/25]  Current/Best:   10.11/  14.55 GFLOPS | Progress: (8/20) | 7.67 s
    [Task  8/25]  Current/Best:   13.23/  14.55 GFLOPS | Progress: (12/20) | 13.90 s
    [Task  8/25]  Current/Best:   18.96/  18.96 GFLOPS | Progress: (16/20) | 16.01 s
    [Task  8/25]  Current/Best:   19.82/  19.82 GFLOPS | Progress: (20/20) | 22.54 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.25/  14.25 GFLOPS | Progress: (4/20) | 12.03 s
    [Task  9/25]  Current/Best:   23.38/  23.38 GFLOPS | Progress: (8/20) | 13.87 s
    [Task  9/25]  Current/Best:    8.25/  23.38 GFLOPS | Progress: (12/20) | 16.25 s
    [Task  9/25]  Current/Best:   17.40/  23.38 GFLOPS | Progress: (16/20) | 18.94 s
    [Task  9/25]  Current/Best:    9.11/  23.38 GFLOPS | Progress: (20/20) | 26.79 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.33/  18.33 GFLOPS | Progress: (4/20) | 2.61 s
    [Task 10/25]  Current/Best:   15.42/  18.33 GFLOPS | Progress: (8/20) | 4.21 s
    [Task 10/25]  Current/Best:   12.67/  19.07 GFLOPS | Progress: (12/20) | 5.75 s
    [Task 10/25]  Current/Best:   19.18/  19.90 GFLOPS | Progress: (16/20) | 6.87 s
    [Task 10/25]  Current/Best:    8.84/  19.90 GFLOPS | Progress: (20/20
 ) | 8.40 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.05/  18.01 GFLOPS | Progress: (4/20) | 3.40 s
    [Task 11/25]  Current/Best:   16.85/  18.01 GFLOPS | Progress: (8/20) | 6.16 s
    [Task 11/25]  Current/Best:   17.16/  18.01 GFLOPS | Progress: (12/20) | 8.25 s
    [Task 11/25]  Current/Best:   13.30/  21.16 GFLOPS | Progress: (16/20) | 11.06 s
    [Task 11/25]  Current/Best:   19.40/  21.43 GFLOPS | Progress: (20/20) | 13.09 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.78/  17.91 GFLOPS | Progress: (4/20) | 5.37 s
    [Task 12/25]  Current/Best:    5.32/  17.91 GFLOPS | Progress: (8/20) | 9.08 s
    [Task 12/25]  Current/Best:   19.09/  19.09 GFLOPS | Progress: (12/20) | 11.08 s
    [Task 12/25]  Current/Best:   15.07/  19.09 GFLOPS | Progress: (16/20) | 13.85 s
    [Task 12/25]  Current/Best:   15.17/  19.09 GFLOPS | Progress: (20/20) | 15.76 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.88/  17.31 GFLOPS | Progress: (4/20) | 3.76 s
    [Task 13/25]  Current/Best:   15.42/  20.75 GFLOPS | Progress: (8/20) | 6.25 s
    [Task 13/25]  Current/Best:   19.42/  21.48 GFLOPS | Progress: (12/20) | 9.13 s
    [Task 13/25]  Current/Best:   12.21/  21.48 GFLOPS | Progress: (16/20) | 12.51 s
    [Task 13/25]  Current/Best:   18.49/  21.48 GFLOPS | Progress: (20/20) | 14.74 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.61/  13.61 GFLOPS | Progress: (4/20) | 3.41 s
    [Task 14/25]  Current/Best:    6.09/  13.61 GFLOPS | Progress: (8/20) | 5.62 s
    [Task 14/25]  Current/Best:   20.33/  20.33 GFLOPS | Progress: (12/20) | 8.17 s
    [Task 14/25]  Current/Best:   17.28/  20.33 GFLOPS | Progress: (16/20) | 9.81 s Done.
+
    [Task 14/25]  Current/Best:   17.22/  20.33 GFLOPS | Progress: (20/20) | 11.56 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.16/  17.59 GFLOPS | Progress: (4/20) | 2.78 s
    [Task 15/25]  Current/Best:   14.27/  17.98 GFLOPS | Progress: (8/20) | 4.13 s
    [Task 15/25]  Current/Best:   10.37/  21.94 GFLOPS | Progress: (12/20) | 6.32 s
    [Task 15/25]  Current/Best:   20.30/  21.94 GFLOPS | Progress: (16/20) | 9.83 s
    [Task 15/25]  Current/Best:    8.75/  21.94 GFLOPS | Progress: (20/20) | 10.87 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.27/  20.27 GFLOPS | Progress: (4/20) | 3.16 s
    [Task 16/25]  Current/Best:    3.02/  20.27 GFLOPS | Progress: (8/20) | 4.78 s
    [Task 16/25]  Current/Best:   19.74/  20.27 GFLOPS | Progress: (12/20) | 6.02 s
    [Task 16/25]  Current/Best:   17.97/  20.27 GFLOPS | Progress: (16/20) |
  7.38 s
    [Task 16/25]  Current/Best:    9.95/  22.31 GFLOPS | Progress: (20/20) | 9.43 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.58/  18.75 GFLOPS | Progress: (4/20) | 4.74 s
    [Task 17/25]  Current/Best:   14.49/  23.07 GFLOPS | Progress: (8/20) | 7.64 s
    [Task 17/25]  Current/Best:   16.80/  23.07 GFLOPS | Progress: (12/20) | 9.70 s
    [Task 17/25]  Current/Best:   16.78/  23.07 GFLOPS | Progress: (16/20) | 11.84 s
    [Task 17/25]  Current/Best:   10.02/  23.07 GFLOPS | Progress: (20/20) | 13.98 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.38/  16.79 GFLOPS | Progress: (4/20) | 3.74 s
    [Task 18/25]  Current/Best:   10.59/  19.79 GFLOPS | Progress: (8/20) | 7.25 s
    [Task 18/25]  Current/Best:   19.15/  19.79 GFLOPS | Progress: (12/20) | 9.19 s
    [Task 18/25]  Current/Best:    9.90/  19.79 GFLOPS | Progress: (16/20) | 12.79 s
    [Task 18/25]  Current/Best:   20.59/  20.59 GFLOPS | Progress: (20/20) | 14.33 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    6.88/  20.26 GFLOPS | Progress: (4/20) | 6.13 s
    [Task 19/25]  Current/Best:    2.60/  20.26 GFLOPS | Progress: (8/20) | 9.40 s
    [Task 19/25]  Current/Best:   19.38/  20.98 GFLOPS | Progress: (12/20) | 12.20 s
    [Task 19/25]  Current/Best:   15.40/  21.34 GFLOPS | Progress: (16/20) | 15.01 s
    [Task 19/25]  Current/Best:    2.74/  23.48 GFLOPS | Progress: (20/20) | 17.75 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    8.80/  15.19 GFLOPS | Progress: (4/20) | 3.33 s Done.
      Done.
-
    [Task 20/25]  Current/Best:    9.89/  14.94 GFLOPS | Progress: (8/20) | 6.81 s
    [Task 20/25]  Current/Best:    2.31/  16.53 GFLOPS | Progress: (12/20) | 10.68 s
    [Task 20/25]  Current/Best:   11.04/  16.53 GFLOPS | Progress: (16/20) | 14.50 s
    [Task 20/25]  Current/Best:   13.11/  21.92 GFLOPS | Progress: (20/20) | 16.60 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.39/  17.29 GFLOPS | Progress: (4/20) | 3.26 s
    [Task 21/25]  Current/Best:   14.62/  17.29 GFLOPS | Progress: (8/20) | 4.86 s
    [Task 21/25]  Current/Best:    1.61/  17.29 GFLOPS | Progress: (12/20) | 7.04 s
    [Task 21/25]  Current/Best:   18.28/  18.28 GFLOPS | Progress: (16/20) | 10.50 s
    [Task 21/25]  Current/Best:    4.42/  18.28 GFLOPS | Progress: (20/20) | 17.74 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  17.04 GFLOPS | Progress: (4/20
 ) | 2.71 s
    [Task 22/25]  Current/Best:    9.20/  21.61 GFLOPS | Progress: (8/20) | 4.68 s
    [Task 22/25]  Current/Best:   19.78/  21.61 GFLOPS | Progress: (12/20) | 6.98 s
    [Task 22/25]  Current/Best:   15.15/  21.61 GFLOPS | Progress: (16/20) | 9.04 s
    [Task 22/25]  Current/Best:   14.31/  21.61 GFLOPS | Progress: (20/20) | 10.77 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.28/  20.22 GFLOPS | Progress: (4/20) | 3.30 s
    [Task 23/25]  Current/Best:   15.57/  20.22 GFLOPS | Progress: (8/20) | 6.68 s
    [Task 23/25]  Current/Best:   20.82/  21.42 GFLOPS | Progress: (12/20) | 8.50 s
    [Task 23/25]  Current/Best:    6.37/  21.42 GFLOPS | Progress: (16/20) | 15.48 s
    [Task 23/25]  Current/Best:    7.66/  21.42 GFLOPS | Progress: (20/20) | 19.71 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.50/   8.50 GFLOPS | Progress: (4/20) | 11.79 s
    [Task 24/25]  Current/Best:    1.98/   8.50 GFLOPS | Progress: (8/20) | 22.81 s
    [Task 24/25]  Current/Best:    4.12/   8.50 GFLOPS | Progress: (12/20) | 34.37 s Done.
+
    [Task 20/25]  Current/Best:   10.76/  15.19 GFLOPS | Progress: (8/20) | 6.60 s
    [Task 20/25]  Current/Best:    2.35/  16.98 GFLOPS | Progress: (12/20) | 10.49 s
    [Task 20/25]  Current/Best:   12.60/  16.98 GFLOPS | Progress: (16/20) | 14.06 s
    [Task 20/25]  Current/Best:   13.61/  22.08 GFLOPS | Progress: (20/20) | 16.13 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.47/  19.06 GFLOPS | Progress: (4/20) | 3.24 s
    [Task 21/25]  Current/Best:   14.64/  19.06 GFLOPS | Progress: (8/20) | 4.78 s
    [Task 21/25]  Current/Best:    1.64/  19.06 GFLOPS | Progress: (12/20) | 6.94 s
    [Task 21/25]  Current/Best:   18.24/  19.06 GFLOPS | Progress: (16/20) | 10.38 s
    [Task 21/25]  Current/Best:    4.53/  19.06 GFLOPS | Progress: (20/20) | 17.50 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.74/  17.15 GFLOPS | Progress: (4/20
 ) | 2.72 s
    [Task 22/25]  Current/Best:    8.92/  22.15 GFLOPS | Progress: (8/20) | 4.61 s
    [Task 22/25]  Current/Best:   20.15/  22.15 GFLOPS | Progress: (12/20) | 6.89 s
    [Task 22/25]  Current/Best:   15.41/  22.15 GFLOPS | Progress: (16/20) | 8.94 s
    [Task 22/25]  Current/Best:   15.22/  22.15 GFLOPS | Progress: (20/20) | 10.65 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.56/  20.62 GFLOPS | Progress: (4/20) | 3.27 s
    [Task 23/25]  Current/Best:   15.82/  21.42 GFLOPS | Progress: (8/20) | 6.59 s
    [Task 23/25]  Current/Best:   21.11/  21.65 GFLOPS | Progress: (12/20) | 8.43 s
    [Task 23/25]  Current/Best:    6.42/  21.65 GFLOPS | Progress: (16/20) | 15.53 s
    [Task 23/25]  Current/Best:    7.80/  21.65 GFLOPS | Progress: (20/20) | 19.71 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.75/   8.75 GFLOPS | Progress: (4/20) | 11.84 s
    [Task 24/25]  Current/Best:    3.39/   8.75 GFLOPS | Progress: (8/20) | 23.10 s
    [Task 24/25]  Current/Best:    4.29/   8.75 GFLOPS | Progress: (12/20) | 33.83 s Done.
      Done.
-
    [Task 24/25]  Current/Best:    6.72/   8.77 GFLOPS | Progress: (16/20) | 39.87 s
    [Task 24/25]  Current/Best:    3.32/   8.83 GFLOPS | Progress: (20/20) | 45.76 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.87 GFLOPS | Progress: (4/20) | 11.63 s
    [Task 25/25]  Current/Best:    5.52/   7.69 GFLOPS | Progress: (8/20) | 22.94 s
    [Task 25/25]  Current/Best:    5.89/   7.69 GFLOPS | Progress: (12/20) | 34.23 s
    [Task 25/25]  Current/Best:    5.74/   8.92 GFLOPS | Progress: (16/20) | 36.04 s
    [Task 25/25]  Current/Best:    2.91/   8.92 GFLOPS | Progress: (20/20) | 46.74 s
+
    [Task 24/25]  Current/Best:    7.22/   9.01 GFLOPS | Progress: (16/20) | 39.36 s
    [Task 24/25]  Current/Best:    3.29/   9.05 GFLOPS | Progress: (20/20) | 45.35 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.57/   2.95 GFLOPS | Progress: (4/20) | 11.64 s
    [Task 25/25]  Current/Best:    5.76/   7.94 GFLOPS | Progress: (8/20) | 22.93 s
    [Task 25/25]  Current/Best:    6.08/   7.94 GFLOPS | Progress: (12/20) | 34.42 s
    [Task 25/25]  Current/Best:    5.93/   9.77 GFLOPS | Progress: (16/20) | 36.16 s
    [Task 25/25]  Current/Best:    2.96/   9.77 GFLOPS | Progress: (20/20) | 46.84 s
 
 
 
@@ -748,8 +748,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 414.13174090998837, 'median': 413.97930649998216, 'std': 0.5881865302862147}
-    unoptimized: {'mean': 495.4212594799992, 'median': 495.1658676500074, 'std': 0.8481904206234964}
+    optimized: {'mean': 402.06581897000433, 'median': 398.96202579998317, 'std': 6.337449570768014}
+    unoptimized: {'mean': 499.04213453998636, 'median': 498.90793589997884, 'std': 0.6439924833093513}
 
 
 
@@ -772,7 +772,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 10 minutes  18.761 seconds)
+   **Total running time of the script:** ( 10 minutes  21.564 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 0065159ab..f6a971e47 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -282,7 +282,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.329e-07 secs/op
+    1.347e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index a20d353e8..2d6c1e066 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -263,7 +263,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x1985bb10)), stage(b, placeholder(b, 0x10073620)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
+    [stage(a, placeholder(a, 0x201ec450)), stage(b, placeholder(b, 0x460cd40)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index f7c4a183b..cc8cd2b88 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**13:20.459** total execution time for **tutorial** files:
+**13:23.355** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:18.761 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:21.564 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:03.827 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:05.379 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:01.790 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:59.942 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:30.744 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:30.600 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:23.969 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.139 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.703 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.823 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.504 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.706 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.154 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.194 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.005 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.004 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index c4845e9e1..230b54f8b 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -301,8 +301,8 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000008
-    naive: 0.000006
+    Numpy running time: 0.000007
+    naive: 0.000008
 
 
 
@@ -403,7 +403,7 @@ compile and run this new schedule with the parallel operation applied:
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallel: 0.000006
+    parallel: 0.000007
 
 
 
@@ -512,10 +512,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    8.36492999951588e-06                     1.0
-                   naive              5.9759e-06      0.7143992837173598
-                parallel    6.032499999999999e-06     0.7211656284450831
-                  vector             2.45517e-05       2.935075368403672
+                   numpy    6.73916000778263e-06                     1.0
+                   naive               7.769e-06      1.1528142959995122
+                parallel              6.9891e-06      1.0370877070627096
+                  vector              2.4569e-05        3.64570658236736
 
 
 
@@ -936,7 +936,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.019065
+    Numpy running time: 0.019157
 
 
 
@@ -996,7 +996,7 @@ optimizations.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    none: 3.462468
+    none: 3.297034
 
 
 
@@ -1101,7 +1101,7 @@ schedule.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    blocking: 0.309103
+    blocking: 0.323046
 
 
 
@@ -1199,7 +1199,7 @@ already cache friendly from our previous optimizations.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    vectorization: 0.339872
+    vectorization: 0.346995
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1275,7 +1275,7 @@ more cache friendly.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    loop permutation: 0.118813
+    loop permutation: 0.121238
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1376,7 +1376,7 @@ optimized schedule.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    array packing: 0.110676
+    array packing: 0.111425
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1471,7 +1471,7 @@ to `C` when all the block results are ready.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    block caching: 0.111103
+    block caching: 0.111270
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1559,7 +1559,7 @@ of thread-level parallelization.
 
     /workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallelization: 0.145508
+    parallelization: 0.145444
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1640,13 +1640,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none            3.4624677401                     1.0
-                blocking            0.3091033604     0.08927256038234532
-           vectorization            0.3398721474     0.09815893545052469
-        loop permutation            0.1188134207    0.034314665036148044
-           array packing     0.11067612730000001      0.0319645223024673
-           block caching     0.11110260050000001     0.03208769260527211
-         parallelization            0.1455083809      0.0420244726657865
+                    none      3.2970339182999995                     1.0
+                blocking            0.3230460005     0.09798079379983066
+           vectorization     0.34699545509999996     0.10524473320520647
+        loop permutation     0.12123827140000001    0.036771921188639846
+           array packing            0.1114250507     0.03379554273965505
+           block caching     0.11126963860000001     0.03374840579661744
+         parallelization            0.1454438823    0.044113553546635355
 
 
 
@@ -1686,11 +1686,6 @@ operations with tunable parameters that allows you to automatically optimize
 the computation for specific platforms.
 
 
-.. rst-class:: sphx-glr-timing
-
-   **Total running time of the script:** ( 1 minutes  1.790 seconds)
-
-
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
 
 .. only:: html
diff --git a/docs/commit_hash b/docs/commit_hash
index 69b603b4d..1f61e58cf 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-c54eea7d0a9890c25eae15df1a0c47b263b27a07
+9c7aaace4355c67403be563de3059d34fb8e29f5
diff --git a/docs/how_to/compile_models/from_darknet.html b/docs/how_to/compile_models/from_darknet.html
index 64d674a71..46caf765e 100644
--- a/docs/how_to/compile_models/from_darknet.html
+++ b/docs/how_to/compile_models/from_darknet.html
@@ -569,7 +569,7 @@ class:[&#39;truck 0.9266&#39;] left:471 top:83 right:689 bottom:169
 class:[&#39;bicycle 0.9984&#39;] left:111 top:113 right:577 bottom:447
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  0.890 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.791 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-darknet-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7716f96385bd5abb6e822041e285be54/from_darknet.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_darknet.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index aafea9d06..e83ae7636 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -422,7 +422,7 @@ to download the full example code</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip3f57ba66-5bd1-48cd-a714-ceb3b1a31e15 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipf0c21fcb-c5be-400a-ada3-38098fbccd38 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 3aa64746e..dd96bc603 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -427,12 +427,13 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
- 19%|#9        | 7.99M/41.5M [00:00&lt;00:00, 70.5MB/s]
- 39%|###8      | 16.0M/41.5M [00:00&lt;00:00, 61.3MB/s]
- 60%|######    | 25.0M/41.5M [00:00&lt;00:00, 73.8MB/s]
- 78%|#######7  | 32.3M/41.5M [00:00&lt;00:00, 72.5MB/s]
- 96%|#########6| 40.0M/41.5M [00:00&lt;00:00, 73.9MB/s]
-100%|##########| 41.5M/41.5M [00:00&lt;00:00, 73.9MB/s]
+ 19%|#9        | 7.99M/41.5M [00:00&lt;00:00, 76.3MB/s]
+ 37%|###6      | 15.3M/41.5M [00:00&lt;00:00, 73.1MB/s]
+ 54%|#####3    | 22.2M/41.5M [00:00&lt;00:00, 73.0MB/s]
+ 70%|#######   | 29.2M/41.5M [00:00&lt;00:00, 49.1MB/s]
+ 83%|########3 | 34.6M/41.5M [00:00&lt;00:00, 44.1MB/s]
+ 96%|#########6| 40.0M/41.5M [00:00&lt;00:00, 42.8MB/s]
+100%|##########| 41.5M/41.5M [00:00&lt;00:00, 50.9MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 246014018..134f2f4d4 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -409,9 +409,10 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 42%|####1     | 18.6M/44.7M [00:00&lt;00:00, 195MB/s]
- 85%|########4 | 37.9M/44.7M [00:00&lt;00:00, 200MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 203MB/s]
+  9%|9         | 4.02M/44.7M [00:00&lt;00:01, 42.1MB/s]
+ 18%|#8        | 8.05M/44.7M [00:00&lt;00:00, 40.9MB/s]
+ 67%|######6   | 29.9M/44.7M [00:00&lt;00:00, 125MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 116MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 1f3266167..d6c958402 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -631,7 +631,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  5.479 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.617 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index 84422315e..1ac3b9c92 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:01.573</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>05:14.531</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -331,43 +331,43 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:05.479</p></td>
+<td><p>01:07.617</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>01:00.890</p></td>
+<td><p>01:04.791</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>00:39.009</p></td>
+<td><p>00:40.806</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:26.922</p></td>
+<td><p>00:29.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:24.571</p></td>
+<td><p>00:26.079</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:24.560</p></td>
+<td><p>00:24.383</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:23.300</p></td>
+<td><p>00:23.995</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:19.894</p></td>
+<td><p>00:20.074</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:14.556</p></td>
+<td><p>00:15.271</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.392</p></td>
+<td><p>00:02.510</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index a215fb406..e62ddfe07 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -648,7 +648,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.3400      16.3749      16.9010      15.8284       0.4512
+  16.6725      16.6659      17.2928      16.0786       0.4463
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index c59892462..35b6de4bb 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -431,13 +431,15 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
- 12%|#2        | 21.2M/170M [00:00&lt;00:00, 222MB/s]
- 28%|##8       | 47.9M/170M [00:00&lt;00:00, 256MB/s]
- 44%|####4     | 75.2M/170M [00:00&lt;00:00, 270MB/s]
- 60%|#####9    | 101M/170M [00:00&lt;00:00, 271MB/s]
- 75%|#######5  | 128M/170M [00:00&lt;00:00, 273MB/s]
- 91%|######### | 154M/170M [00:00&lt;00:00, 275MB/s]
-100%|##########| 170M/170M [00:00&lt;00:00, 268MB/s]
+  7%|6         | 11.7M/170M [00:00&lt;00:01, 115MB/s]
+ 16%|#6        | 27.3M/170M [00:00&lt;00:01, 142MB/s]
+ 27%|##7       | 46.5M/170M [00:00&lt;00:00, 169MB/s]
+ 40%|###9      | 67.2M/170M [00:00&lt;00:00, 188MB/s]
+ 53%|#####3    | 90.7M/170M [00:00&lt;00:00, 209MB/s]
+ 66%|######6   | 112M/170M [00:00&lt;00:00, 214MB/s]
+ 80%|########  | 136M/170M [00:00&lt;00:00, 226MB/s]
+ 94%|#########4| 160M/170M [00:00&lt;00:00, 234MB/s]
+100%|##########| 170M/170M [00:00&lt;00:00, 208MB/s]
 /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=&#39;floor&#39;).
@@ -532,7 +534,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  56.292 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  3.849 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index fe2bd7685..0c1060e23 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -475,7 +475,7 @@ training. Other models require a full post training calibration.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 165MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 152MB/s]
 </pre></div>
 </div>
 </div>
@@ -564,7 +564,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.3487      90.2415      96.5346      90.1097       0.6424
+  90.3458      90.2961      90.8836      90.1769       0.1406
 </pre></div>
 </div>
 <div class="admonition note">
@@ -603,7 +603,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  8.511 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  9.946 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index d5dd22ca4..738a9e9f6 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -568,7 +568,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  119.6078     119.5939     120.5944     118.7345      0.3383
+  120.4244     120.3765     124.4274     119.7161      0.5119
 </pre></div>
 </div>
 <div class="admonition note">
@@ -596,7 +596,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  51.782 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  52.525 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 1f54d31dd..bfee4c7e4 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -504,7 +504,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  18.719 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  26.287 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 7583a7ac1..1b07e46ed 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -436,23 +436,23 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  5%|4         | 6141/132723 [00:00&lt;00:02, 61398.49KB/s]
- 11%|#         | 13975/132723 [00:00&lt;00:01, 71360.35KB/s]
- 17%|#6        | 21946/132723 [00:00&lt;00:01, 75163.93KB/s]
- 23%|##2       | 29974/132723 [00:00&lt;00:01, 77179.65KB/s]
- 29%|##8       | 37933/132723 [00:00&lt;00:01, 78037.58KB/s]
- 35%|###4      | 45998/132723 [00:00&lt;00:01, 78924.47KB/s]
- 41%|####      | 53962/132723 [00:00&lt;00:00, 79156.23KB/s]
- 47%|####6     | 61982/132723 [00:00&lt;00:00, 79485.31KB/s]
- 53%|#####2    | 69931/132723 [00:00&lt;00:00, 79383.92KB/s]
- 59%|#####8    | 77945/132723 [00:01&lt;00:00, 79612.51KB/s]
- 65%|######4   | 85939/132723 [00:01&lt;00:00, 79711.47KB/s]
- 71%|#######   | 93931/132723 [00:01&lt;00:00, 79771.54KB/s]
- 77%|#######6  | 101909/132723 [00:01&lt;00:00, 78995.62KB/s]
- 83%|########2 | 109908/132723 [00:01&lt;00:00, 79290.01KB/s]
- 89%|########8 | 117839/132723 [00:01&lt;00:00, 78900.02KB/s]
- 95%|#########4| 125776/132723 [00:01&lt;00:00, 79038.21KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 78401.34KB/s]
+  5%|4         | 6448/132723 [00:00&lt;00:01, 64472.99KB/s]
+ 11%|#1        | 15048/132723 [00:00&lt;00:01, 77132.28KB/s]
+ 17%|#7        | 22762/132723 [00:00&lt;00:01, 70546.96KB/s]
+ 24%|##3       | 31405/132723 [00:00&lt;00:01, 76478.66KB/s]
+ 30%|###       | 40055/132723 [00:00&lt;00:01, 79963.79KB/s]
+ 36%|###6      | 48218/132723 [00:00&lt;00:01, 80514.42KB/s]
+ 42%|####2     | 56309/132723 [00:00&lt;00:01, 73937.70KB/s]
+ 49%|####8     | 64942/132723 [00:00&lt;00:00, 77609.39KB/s]
+ 55%|#####4    | 72809/132723 [00:00&lt;00:00, 76322.25KB/s]
+ 61%|######1   | 81421/132723 [00:01&lt;00:00, 79197.94KB/s]
+ 67%|######7   | 89406/132723 [00:01&lt;00:00, 78898.33KB/s]
+ 74%|#######3  | 98029/132723 [00:01&lt;00:00, 81063.33KB/s]
+ 80%|########  | 106720/132723 [00:01&lt;00:00, 82796.24KB/s]
+ 87%|########6 | 115029/132723 [00:01&lt;00:00, 80953.18KB/s]
+ 93%|#########2| 123421/132723 [00:01&lt;00:00, 81821.98KB/s]
+100%|#########9| 132186/132723 [00:01&lt;00:00, 83542.16KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 79265.78KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -495,7 +495,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  31.696 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  36.102 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index c9d02928f..072098eb2 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>10:38.537</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>11:02.182</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -331,31 +331,31 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>02:56.292</p></td>
+<td><p>03:03.849</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>02:31.696</p></td>
+<td><p>02:36.102</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>01:51.782</p></td>
+<td><p>01:52.525</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:18.719</p></td>
+<td><p>01:26.287</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:08.511</p></td>
+<td><p>01:09.946</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:29.362</p></td>
+<td><p>00:30.712</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:22.169</p></td>
+<td><p>00:22.757</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 56c6e276a..6214524e0 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -607,7 +607,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.ziped449611-ba6b-4262-abcd-0f67dd37956f from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip37305150-8aaa-4c94-8db0-c7a97f22fa14 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
@@ -671,7 +671,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/workspace/python/tvm/driver/build_module.py:268: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
   &quot;target_host parameter is going to be deprecated. &quot;
-  Check failed: (lower) is false: Intrinsic lowering function for target llvm, intrinsic name tir.sqrt, type 150 not found
+  Check failed: (lower) is false: FloatImm lowering function for target llvm type 150 not found
 </pre></div>
 </div>
 <p>When we attempt to run the model, we get a familiar error telling us that more functions need to be registered for myfloat.</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index e5fb5747d..5e4d40988 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:40.233</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:41.921</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -331,19 +331,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:37.095</p></td>
+<td><p>00:38.654</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.207</p></td>
+<td><p>00:02.295</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:00.924</p></td>
+<td><p>00:00.965</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
-<td><p>00:00.007</p></td>
+<td><p>00:00.008</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index e39863252..3f884ce31 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -507,10 +507,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6612us [6612us] (45.43%; 45.43%)
-FoldScaleAxis: 7941us [6us] (54.57%; 54.57%)
-        FoldConstant: 7935us [1596us] (54.53%; 99.93%)
-                InferType: 6339us [6339us] (43.56%; 79.89%)
+InferType: 6823us [6823us] (45.33%; 45.33%)
+FoldScaleAxis: 8228us [7us] (54.67%; 54.67%)
+        FoldConstant: 8222us [1659us] (54.62%; 99.92%)
+                InferType: 6562us [6562us] (43.60%; 79.82%)
 </pre></div>
 </div>
 </div>
@@ -532,10 +532,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6191us [6191us] (44.33%; 44.33%)
-FoldScaleAxis: 7775us [4us] (55.67%; 55.67%)
-        FoldConstant: 7770us [1629us] (55.64%; 99.94%)
-                InferType: 6141us [6141us] (43.97%; 79.03%)
+InferType: 6569us [6569us] (44.87%; 44.87%)
+FoldScaleAxis: 8071us [6us] (55.13%; 55.13%)
+        FoldConstant: 8065us [1679us] (55.09%; 99.93%)
+                InferType: 6387us [6387us] (43.63%; 79.19%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index cb3b0f01f..9d8c2bade 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -559,7 +559,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.167493 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.117075 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 644adccd9..2fa414e5e 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -901,7 +901,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 6.615088 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.361453 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index 7a922f08f..110866b43 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -456,8 +456,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018717
-Baseline: 3.340915
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.017869
+Baseline: 3.290199
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -517,7 +517,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.294002
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.309832
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -584,7 +584,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.333302
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.346444
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -645,7 +645,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.118043
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.118942
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -728,7 +728,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110974
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.109270
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -814,7 +814,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.112042
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.103795
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -904,7 +904,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.145295
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.144459
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index dec356d14..982b42d96 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:34.252</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:34.305</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,15 +331,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:31.922</p></td>
+<td><p>00:31.852</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.296</p></td>
+<td><p>00:01.379</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:01.034</p></td>
+<td><p>00:01.073</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index 71b35ea73..4c75e171d 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:58.178</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>06:01.892</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -331,27 +331,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>03:11.755</p></td>
+<td><p>03:14.483</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:22.220</p></td>
+<td><p>01:23.600</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>00:45.922</p></td>
+<td><p>00:46.497</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:20.657</p></td>
+<td><p>00:19.399</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:08.842</p></td>
+<td><p>00:09.054</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:08.782</p></td>
+<td><p>00:08.859</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index b0723dedc..5a217dcc9 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -486,483 +486,105 @@ cooperative fetching, unrolling and operator fusion.</p>
              compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
   preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 28;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+  allocate(conv2d_nchw: Pointer(local float32), float32, [7]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [1008]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [384]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [7], [], scope=&quot;local&quot;, align=16)[0] = 0f32
     conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[2] = 0f32
     conv2d_nchw_1[3] = 0f32
     conv2d_nchw_1[4] = 0f32
     conv2d_nchw_1[5] = 0f32
     conv2d_nchw_1[6] = 0f32
-    conv2d_nchw_1[7] = 0f32
-    conv2d_nchw_1[8] = 0f32
-    conv2d_nchw_1[9] = 0f32
-    conv2d_nchw_1[10] = 0f32
-    conv2d_nchw_1[11] = 0f32
-    conv2d_nchw_1[12] = 0f32
-    conv2d_nchw_1[13] = 0f32
-    for (rc.outer.outer: int32, 0, 64) {
+    for (rc.outer.outer: int32, 0, 32) {
       for (ry.outer.outer: int32, 0, 3) {
-        let cse_var_2: int32 = (rc.outer.outer*72)
+        let cse_var_4: int32 = (rc.outer.outer*784)
+        let cse_var_3: int32 = (ry.outer.outer*7)
+        let cse_var_2: int32 = (rc.outer.outer*144)
         let cse_var_1: int32 = (ry.outer.outer*3)
          {
-          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope=&quot;shared&quot;)[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) +  [...]
-            }
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0 [...]
-            }
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0 [...]
-            }
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0 [...]
-            }
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1: Buffer(pad_temp.shared, float32, [1008], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else(((((1 &lt;= (floordiv(threadIdx.x_1, 9) + ry.outer.outer)) &amp;&amp; ((floordiv(threadIdx.x_1, 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 56)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 56), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 112)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 112), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 168)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 168), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 224), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 280)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 1), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 1), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 280), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 336)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 336), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 392), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 448)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 448), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 504)] = @tir.if_then_else(((((1 &lt;= (floordiv(threadIdx.x_1, 9) + ry.outer.outer)) &amp;&amp; ((floordiv(threadIdx.x_1, 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) + 384)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 560)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 56), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 2), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 2), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 560), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 2), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 616)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 49), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 4), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 4), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 616), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 4), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 672)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 42), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 6), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 6), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 672), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 6), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 728)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 728), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 784)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 28), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 1), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 1), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 784), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 1), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 840)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 21), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 3), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 3), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 840), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 3), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 896)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 14), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 5), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 5), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 896), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 5), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          pad_temp.shared_1[(threadIdx.x_1 + 952)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 952), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1: Buffer(kernel.shared, float32, [384], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((blockIdx.x*36864) + (floordiv(threadIdx.x_2, 48)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 48), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 56)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 56), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 112)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 112), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 168)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 168), 48)*4608)) + cse_var_2) + (floormod((floordiv(threadIdx.x_2, 3) + 8), 16)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 224), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 32), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          kernel.shared_1[(threadIdx.x_2 + 280)] = kernel[((((((blockIdx.x*36864) + (floordiv((threadIdx.x_2 + 280), 48)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 40), 48), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 56;
+          if @tir.likely((threadIdx.x_2 &lt; 48), dtype=bool) {
+            kernel.shared_1[(threadIdx.x_2 + 336)] = kernel[((((((blockIdx.x*36864) + cse_var_2) + (floordiv(threadIdx.x_2, 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 32256)]
+          }
+          for (rc.outer.inner: int32, 0, 16) {
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*63) + floormod(threadIdx.x, 7))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 9)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 10)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 11)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 18)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 19)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 20)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 27)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 28)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 29)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 36)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 37)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 38)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 45)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 46)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 47)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 54)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 55)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
+            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + floormod(threadIdx.x, 7)) + 56)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
           }
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 64), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 128), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 256), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 320), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 512), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 640), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 704), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 832), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1024), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1088), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1216), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1280), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1408), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1472), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1600), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1664), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1792), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1856), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1984), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2048), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2176), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2240), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2368), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2432), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2560), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2624), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2752), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2816), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2944), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 3008), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
         }
       }
     }
-    for (i1.inner: int32, 0, 2) {
-      for (i3.inner: int32, 0, 7) {
-        compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-      }
+    for (i2.inner: int32, 0, 7) {
+      compute[((((blockIdx.x*392) + (floordiv(threadIdx.x, 7)*49)) + (i2.inner*7)) + floormod(threadIdx.x, 7))] = max((conv2d_nchw_1[i2.inner] + bias[((blockIdx.x*8) + floordiv(threadIdx.x, 7))]), 0f32)
     }
   }
 }
@@ -999,7 +621,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.358 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.287 ms
 </pre></div>
 </div>
 </div>
@@ -1029,35 +651,35 @@ conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
 conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
 conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
-conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
+conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=7)
 conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=1)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=16)
 conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
 conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
-conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=3)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
 compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
-compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
+compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=7)
 compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1077,14 +699,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=56)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 64)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -1102,10 +724,10 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[14];
-  __shared__ float pad_temp_shared[72];
-  __shared__ float kernel_shared[3072];
+extern &quot;C&quot; __global__ void __launch_bounds__(56) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[7];
+  __shared__ float pad_temp_shared[1008];
+  __shared__ float kernel_shared[384];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
@@ -1113,419 +735,64 @@ extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kern
   conv2d_nchw[4] = 0.000000e+00f;
   conv2d_nchw[5] = 0.000000e+00f;
   conv2d_nchw[6] = 0.000000e+00f;
-  conv2d_nchw[7] = 0.000000e+00f;
-  conv2d_nchw[8] = 0.000000e+00f;
-  conv2d_nchw[9] = 0.000000e+00f;
-  conv2d_nchw[10] = 0.000000e+00f;
-  conv2d_nchw[11] = 0.000000e+00f;
-  conv2d_nchw[12] = 0.000000e+00f;
-  conv2d_nchw[13] = 0.000000e+00f;
-  for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
+  for (int rc_outer_outer = 0; rc_outer_outer &lt; 32; ++rc_outer_outer) {
     for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
       __syncthreads();
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 9))) &amp;&amp; (((((int)threadIdx.x) * 4) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
-      }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
-      }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[((int)threadIdx.x)] = (((((1 &lt;= ((((int)threadIdx.x) / 9) + ry_outer_outer)) &amp;&amp; (((((int)threadIdx.x) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 56)] = (((((1 &lt;= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 56) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 112)] = (((((1 &lt;= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 112) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 168)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 168) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 224) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 280)] = (((((1 &lt;= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 1) % 9))) &amp;&amp; (((((int)threadIdx.x) + 1) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 280) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 336)] = (((((1 &lt;= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 336) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 392)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 392) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 448)] = (((((1 &lt;= (((((int)threadIdx.x) + 7) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) + 7) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 448) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 504)] = (((((1 &lt;= ((((int)threadIdx.x) / 9) + ry_outer_outer)) &amp;&amp; (((((int)threadIdx.x) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) + 384)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 560)] = (((((1 &lt;= ((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 56) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 2) % 9))) &amp;&amp; (((((int)threadIdx.x) + 2) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 560) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 2) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 616)] = (((((1 &lt;= ((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 49) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 4) % 9))) &amp;&amp; (((((int)threadIdx.x) + 4) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 616) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 4) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 672)] = (((((1 &lt;= ((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 42) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 6) % 9))) &amp;&amp; (((((int)threadIdx.x) + 6) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 672) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 6) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 728)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 728) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 784)] = (((((1 &lt;= ((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 28) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 1) % 9))) &amp;&amp; (((((int)threadIdx.x) + 1) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 784) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 1) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 840)] = (((((1 &lt;= ((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 21) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 3) % 9))) &amp;&amp; (((((int)threadIdx.x) + 3) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 840) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 3) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 896)] = (((((1 &lt;= ((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 14) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 5) % 9))) &amp;&amp; (((((int)threadIdx.x) + 5) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 896) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 5) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[(((int)threadIdx.x) + 952)] = (((((1 &lt;= (((((int)threadIdx.x) + 7) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) + 7) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 784) + (((((int)threadIdx.x) + 952) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+      kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 36864) + ((((int)threadIdx.x) / 48) * 4608)) + (rc_outer_outer * 144)) + (((((int)threadIdx.x) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 56)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 56) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 8) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 112)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 112) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 16) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 168)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 168) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) / 3) + 8) &amp; 15) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 224) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 32) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 280)] = kernel[((((((((int)blockIdx.x) * 36864) + (((((int)threadIdx.x) + 280) / 48) * 4608)) + (rc_outer_outer * 144)) + ((((((int)threadIdx.x) + 40) % 48) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      if (((int)threadIdx.x) &lt; 48) {
+        kernel_shared[(((int)threadIdx.x) + 336)] = kernel[((((((((int)blockIdx.x) * 36864) + (rc_outer_outer * 144)) + ((((int)threadIdx.x) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 32256)];
       }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
-      kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
-      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
-      kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
-      kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
-      kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
-      kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
-      kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
-      kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
-      kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
-      kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
-      kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
-      kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
-      kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
-      kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
-      kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
       __syncthreads();
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      for (int rc_outer_inner = 0; rc_outer_inner &lt; 16; ++rc_outer_inner) {
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 63) + (((int)threadIdx.x) % 7))] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 9)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 10)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 11)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 18)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 19)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 20)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 27)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 28)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 29)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 36)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 37)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 38)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 45)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 46)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 47)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 54)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 55)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
+        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + (((int)threadIdx.x) % 7)) + 56)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
+      }
     }
   }
-  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
-    for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
-      compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-    }
+  for (int i2_inner = 0; i2_inner &lt; 7; ++i2_inner) {
+    compute[((((((int)blockIdx.x) * 392) + ((((int)threadIdx.x) / 7) * 49)) + (i2_inner * 7)) + (((int)threadIdx.x) % 7))] = max((conv2d_nchw[i2_inner] + bias[((((int)blockIdx.x) * 8) + (((int)threadIdx.x) / 7))]), 0.000000e+00f);
   }
 }
 </pre></div>
@@ -1562,7 +829,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  11.755 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  14.483 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index 9a06d5ffd..f03172daa 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -901,7 +901,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  10.0768      10.1030      10.1199      10.0076       0.0494
+   9.8601       9.8492       9.8942       9.8368       0.0246
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index 70556536e..8f37b240e 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -920,7 +920,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  760.8700     760.6951     761.9084     760.0065      0.7862
+  768.2140     768.1146     768.6868     767.8406      0.3525
 </pre></div>
 </div>
 </div>
@@ -942,7 +942,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  22.220 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  23.600 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index 8168cf86b..54ff7cd44 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -620,77 +620,75 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-  preflattened_buffer_map = {placeholder_8: placeholder_15: Buffer(placeholder_13, int32, [33], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_9: placeholder_17: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_7: placeholder_18: Buffer(placeholder_12, int32, [4916], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], [])} {
-  for (i0.outer.i1.outer.fused: int32, 0, 128) &quot;parallel&quot; {
-    allocate(compute_4: Pointer(global float32), float32, [512]), storage_scope = global {
-      for (i.outer.inner: int32, 0, 2) {
-        for (nb_j.inner: int32, 0, 2) {
-          for (i.inner.init: int32, 0, 8) {
-            let cse_var_1: int32 = (((i.outer.inner*256) + (i.inner.init*32)) + (nb_j.inner*16))
-             {
-              compute_5: Buffer(compute_4, float32, [512], [])[cse_var_1] = 0f32
-              compute_5[(cse_var_1 + 1)] = 0f32
-              compute_5[(cse_var_1 + 2)] = 0f32
-              compute_5[(cse_var_1 + 3)] = 0f32
-              compute_5[(cse_var_1 + 4)] = 0f32
-              compute_5[(cse_var_1 + 5)] = 0f32
-              compute_5[(cse_var_1 + 6)] = 0f32
-              compute_5[(cse_var_1 + 7)] = 0f32
-              compute_5[(cse_var_1 + 8)] = 0f32
-              compute_5[(cse_var_1 + 9)] = 0f32
-              compute_5[(cse_var_1 + 10)] = 0f32
-              compute_5[(cse_var_1 + 11)] = 0f32
-              compute_5[(cse_var_1 + 12)] = 0f32
-              compute_5[(cse_var_1 + 13)] = 0f32
-              compute_5[(cse_var_1 + 14)] = 0f32
-              compute_5[(cse_var_1 + 15)] = 0f32
-            }
+  preflattened_buffer_map = {placeholder_7: placeholder_15: Buffer(placeholder_12, int32, [4916], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_9: placeholder_18: Buffer(placeholder_14, float32, [128, 512], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], [])} {
+  for (i0.outer.i1.outer.fused: int32, 0, 256) &quot;parallel&quot; {
+    allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
+      for (nb_j.inner: int32, 0, 2) {
+        for (i.inner.init: int32, 0, 8) {
+          let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
+           {
+            compute_5: Buffer(compute_4, float32, [256], [])[cse_var_1] = 0f32
+            compute_5[(cse_var_1 + 1)] = 0f32
+            compute_5[(cse_var_1 + 2)] = 0f32
+            compute_5[(cse_var_1 + 3)] = 0f32
+            compute_5[(cse_var_1 + 4)] = 0f32
+            compute_5[(cse_var_1 + 5)] = 0f32
+            compute_5[(cse_var_1 + 6)] = 0f32
+            compute_5[(cse_var_1 + 7)] = 0f32
+            compute_5[(cse_var_1 + 8)] = 0f32
+            compute_5[(cse_var_1 + 9)] = 0f32
+            compute_5[(cse_var_1 + 10)] = 0f32
+            compute_5[(cse_var_1 + 11)] = 0f32
+            compute_5[(cse_var_1 + 12)] = 0f32
+            compute_5[(cse_var_1 + 13)] = 0f32
+            compute_5[(cse_var_1 + 14)] = 0f32
+            compute_5[(cse_var_1 + 15)] = 0f32
           }
-          for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-            for (i.inner: int32, 0, 8) {
-              let cse_var_21: int32 = (elem_idx*16)
-              let cse_var_20: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-              let cse_var_19: int32 = (((i.outer.inner*256) + (i.inner*32)) + (nb_j.inner*16))
-              let cse_var_18: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i.outer.inner*2048)) + (i.inner*256))
-              let cse_var_17: int32 = (cse_var_19 + 9)
-              let cse_var_16: int32 = (cse_var_19 + 8)
-              let cse_var_15: int32 = (cse_var_19 + 7)
-              let cse_var_14: int32 = (cse_var_19 + 6)
-              let cse_var_13: int32 = (cse_var_19 + 5)
-              let cse_var_12: int32 = (cse_var_19 + 4)
-              let cse_var_11: int32 = (cse_var_19 + 3)
-              let cse_var_10: int32 = (cse_var_19 + 2)
-              let cse_var_9: int32 = (cse_var_19 + 15)
-              let cse_var_8: int32 = (cse_var_19 + 14)
-              let cse_var_7: int32 = (cse_var_19 + 13)
-              let cse_var_6: int32 = (cse_var_19 + 12)
-              let cse_var_5: int32 = (cse_var_19 + 11)
-              let cse_var_4: int32 = (cse_var_19 + 10)
-              let cse_var_3: int32 = (cse_var_19 + 1)
-               {
-                compute_5[cse_var_19] = (compute_5[cse_var_19] + (placeholder_1[((placeholder_3[cse_var_20]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-                compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_20]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_20] + elem_idx)])], 0f32)))
-              }
+        }
+        for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
+          for (i.inner: int32, 0, 8) {
+            let cse_var_21: int32 = (elem_idx*16)
+            let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
+            let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
+            let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*2048) + (i.inner*256))
+            let cse_var_17: int32 = (cse_var_20 + 9)
+            let cse_var_16: int32 = (cse_var_20 + 8)
+            let cse_var_15: int32 = (cse_var_20 + 7)
+            let cse_var_14: int32 = (cse_var_20 + 6)
+            let cse_var_13: int32 = (cse_var_20 + 5)
+            let cse_var_12: int32 = (cse_var_20 + 4)
+            let cse_var_11: int32 = (cse_var_20 + 3)
+            let cse_var_10: int32 = (cse_var_20 + 2)
+            let cse_var_9: int32 = (cse_var_20 + 15)
+            let cse_var_8: int32 = (cse_var_20 + 14)
+            let cse_var_7: int32 = (cse_var_20 + 13)
+            let cse_var_6: int32 = (cse_var_20 + 12)
+            let cse_var_5: int32 = (cse_var_20 + 11)
+            let cse_var_4: int32 = (cse_var_20 + 10)
+            let cse_var_3: int32 = (cse_var_20 + 1)
+             {
+              compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+              compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
             }
           }
         }
       }
-      for (i0.inner: int32, 0, 16) {
-        let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
+      for (i0.inner: int32, 0, 8) {
+        let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*4096) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
         compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
       }
     }
@@ -729,7 +727,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.861 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.795 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index e711adc56..07207687c 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:43.743</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:44.036</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -331,7 +331,7 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:43.708</p></td>
+<td><p>00:44.000</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index 5a523982b..925c70d56 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -1167,8 +1167,8 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2885496
-No: 6   GFLOPS: 42.35/42.35     result: MeasureResult(costs=(0.005466809578947368,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.624403715133667, timestamp=1657994920.9398527)        [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
-No: 7   GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 6   GFLOPS: 96.87/96.87     result: MeasureResult(costs=(0.00238975975,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6457629203796387, timestamp=1658166893.5703437)      [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
+No: 7   GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1291,7 +1291,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 16, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6225319
-No: 8   GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 8   GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1414,7 +1414,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,943546
-No: 9   GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 9   GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1537,7 +1537,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 16, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2868708
-No: 10  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 10  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
     res = future.result()
   File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
@@ -1555,7 +1555,7 @@ No: 10  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
 TimeoutError
 
         [(&#39;tile_f&#39;, [-1, 32, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4691833
-No: 11  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 11  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1678,7 +1678,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,1042124
-No: 12  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 12  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1801,7 +1801,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10013405
-No: 13  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 13  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1924,7 +1924,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6732082
-No: 14  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 14  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2047,7 +2047,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 32]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7536735
-No: 15  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 15  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2170,7 +2170,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 128, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,482121
-No: 16  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 16  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2293,7 +2293,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2824525
-No: 17  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 17  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2416,7 +2416,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4559286
-No: 18  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 18  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2539,7 +2539,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 32, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9677544
-No: 19  GFLOPS: 0.00/42.35      result: Traceback (most recent call last):
+No: 19  GFLOPS: 0.00/96.87      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 738, in __call__
     yield remote, remote.load_module(os.path.split(build_result.filename)[1])
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 702, in run_through_rpc
@@ -2627,7 +2627,7 @@ tvm._ffi.base.TVMError: Traceback (most recent call last):
   15: _PyEval_EvalFrameDefault
   14: 0x0000000000537c30
   13: _PyObject_FastCallKeywords
-  12: 0x00007f3cfa5bffa2
+  12: 0x00007f51fe81dfa2
   11: _ctypes_callproc
   10: ffi_call
   9: ffi_call_unix64
@@ -2692,7 +2692,7 @@ Traceback (most recent call last):
   21: _PyFunction_FastCallKeywords
   20: _PyEval_EvalFrameDefault
   19: _PyFunction_FastCall      [(&#39;tile_f&#39;, [-1, 8, 2, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6390073
-No: 20  GFLOPS: 144.60/144.60   result: MeasureResult(costs=(0.00160099481,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.429844617843628, timestamp=1657994947.512822)        [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
+No: 20  GFLOPS: 144.94/144.94   result: MeasureResult(costs=(0.00159719953,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4513959884643555, timestamp=1658166920.2266512)      [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2733,7 +2733,7 @@ and measure running time.</p>
 Best config:
 [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 Finish loading 20 records
-Time cost of this operator: 0.002011
+Time cost of this operator: 0.002046
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index 44a858518..8b956b154 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -578,10 +578,10 @@ the tuned operator.</p>
 ########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.9     98.721   (1, 2, 10, 10, 3)  2       1        [312.9]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.094     0.976    (1, 6, 10, 10)     1       1        [3.094]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.961     0.303    (1, 1, 10, 10, 3)  1       1        [0.961]
-Total_time                                    -                                             316.954   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  309.4     98.727   (1, 2, 10, 10, 3)  2       1        [309.4]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.056     0.975    (1, 6, 10, 10)     1       1        [3.056]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.933     0.298    (1, 1, 10, 10, 3)  1       1        [0.933]
+Total_time                                    -                                             313.389   -        -                  -       -        -
 </pre></div>
 </div>
 </div>
@@ -634,10 +634,10 @@ Total_time                                    -
 ########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  118.4     97.735   (1, 6, 10, 10, 1)  2       1        [118.4]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.772     1.462    (1, 6, 10, 10)     1       1        [1.772]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.972     0.802    (1, 1, 10, 10, 3)  1       1        [0.972]
-Total_time                                    -                                             121.144   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  120.7     97.674   (1, 6, 10, 10, 1)  2       1        [120.7]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.943     1.572    (1, 6, 10, 10)     1       1        [1.943]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.931     0.754    (1, 1, 10, 10, 3)  1       1        [0.931]
+Total_time                                    -                                             123.574   -        -                  -       -        -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index f44d6f44b..c74a4a356 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -510,7 +510,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmp6zkkzdua/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpd9td4hce/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -570,8 +570,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp6zkkzdua/images/target contains 8144 images
-/tmp/tmp6zkkzdua/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpd9td4hce/images/target contains 8144 images
+/tmp/tmpd9td4hce/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -683,13 +683,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 55s - loss: 0.2461 - accuracy: 0.9179 - val_loss: 0.1287 - val_accuracy: 0.9607
+328/328 - 54s - loss: 0.2192 - accuracy: 0.9238 - val_loss: 0.1492 - val_accuracy: 0.9551
 Epoch 2/3
-328/328 - 52s - loss: 0.1059 - accuracy: 0.9601 - val_loss: 0.1205 - val_accuracy: 0.9649
+328/328 - 50s - loss: 0.1006 - accuracy: 0.9611 - val_loss: 0.1121 - val_accuracy: 0.9641
 Epoch 3/3
-328/328 - 52s - loss: 0.0725 - accuracy: 0.9718 - val_loss: 0.1118 - val_accuracy: 0.9641
+328/328 - 51s - loss: 0.0702 - accuracy: 0.9738 - val_loss: 0.1290 - val_accuracy: 0.9547
 
-&lt;keras.callbacks.History object at 0x7f053fd2ad50&gt;
+&lt;keras.callbacks.History object at 0x7f397ddf9090&gt;
 </pre></div>
 </div>
 </div>
@@ -951,7 +951,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  40.304 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  52.807 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index 4318895d9..49c0dc1d1 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:29.397</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>05:40.707</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,15 +331,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>04:40.304</p></td>
+<td><p>04:52.807</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:45.646</p></td>
+<td><p>00:44.473</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:03.446</p></td>
+<td><p>00:03.425</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index c990260b4..5273a864a 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:11.496</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:11.637</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,11 +331,11 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:09.940</p></td>
+<td><p>00:10.115</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.549</p></td>
+<td><p>00:01.516</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index 541daf783..c7d865d67 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -517,7 +517,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f04c30e39e0&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f38e432ab00&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index 7257b8ad5..d8f34a6b6 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:04.053</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:04.378</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,23 +331,23 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:01.873</p></td>
+<td><p>00:02.027</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:00.967</p></td>
+<td><p>00:01.049</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.523</p></td>
+<td><p>00:00.567</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.507</p></td>
+<td><p>00:00.550</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.100</p></td>
+<td><p>00:00.102</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
@@ -355,7 +355,7 @@
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
-<td><p>00:00.027</p></td>
+<td><p>00:00.029</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index dae6ebdb4..75d7578fd 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -572,7 +572,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C}
   preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmptfa1kiz5/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmptfa1kiz5/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpknbs3_1z/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpknbs3_1z/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/reference/api/doxygen/affine__type_8h_source.html b/docs/reference/api/doxygen/affine__type_8h_source.html
index bc1dedea8..4f0c195dd 100644
--- a/docs/reference/api/doxygen/affine__type_8h_source.html
+++ b/docs/reference/api/doxygen/affine__type_8h_source.html
@@ -93,7 +93,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1TensorAffineTypeNode_html_a45fefe29872d61434bd7c7f01cd98536"><div class="ttname"><a href="classtvm_1_1TensorAffineTypeNode.html#a45fefe29872d61434bd7c7f01cd98536">tvm::TensorAffineTypeNode::zero_point</a></div><div class="ttdeci">RelayExpr zero_point</div><div class="ttdoc">The zero point of this type. </div><div class="ttdef"><b>Definition:</b> affine_type.h:71</div></div>
 <div class="ttc" id="classtvm_1_1AffineTypeNode_html_a8064dec5da4e223b235c14e4ca72d06b"><div class="ttname"><a href="classtvm_1_1AffineTypeNode.html#a8064dec5da4e223b235c14e4ca72d06b">tvm::AffineTypeNode::_type_has_method_shash_reduce</a></div><div class="ttdeci">static constexpr const bool _type_has_method_shash_reduce</div><div class="ttdef"><b>Definition:</b> affine_type.h:46</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1TupleAffineTypeNode_html_a789274829a01ffd4ea646575c064f1c9"><div class="ttname"><a href="classtvm_1_1TupleAffineTypeNode.html#a789274829a01ffd4ea646575c064f1c9">tvm::TupleAffineTypeNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const TupleAffineTypeNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> affine_type.h:124</div></div>
 <div class="ttc" id="classtvm_1_1TensorAffineTypeNode_html_a1f64785eec11b404bb9004edef21a63a"><div class="ttname"><a href="classtvm_1_1TensorAffineTypeNode.html#a1f64785eec11b404bb9004edef21a63a">tvm::TensorAffineTypeNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> affine_type.h:90</div></div>
diff --git a/docs/reference/api/doxygen/analyzer_8h_source.html b/docs/reference/api/doxygen/analyzer_8h_source.html
index 0e69ab3ca..b7beb2991 100644
--- a/docs/reference/api/doxygen/analyzer_8h_source.html
+++ b/docs/reference/api/doxygen/analyzer_8h_source.html
@@ -84,7 +84,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:167</div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1ModularSet_html"><div class="ttname"><a href="classtvm_1_1arith_1_1ModularSet.html">tvm::arith::ModularSet</a></div><div class="ttdoc">reference of ModularSetNode </div><div class="ttdef"><b>Definition:</b> analyzer.h:204</div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_ac6a38da661cd3681eb85abe1cd810422aad0250170b362173e1e2a2e3a6f13d20"><div class="ttname"><a href="namespacetvm_1_1arith.html#ac6a38da661cd3681eb85abe1cd810422aad0250170b362173e1e2a2e3a6f13d20">tvm::arith::kFloorDiv</a></div><div class="ttdoc">Floor division. </div><div class="ttdef"><b>Definition:</b> analyzer.h:59</div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_ac6a38da661cd3681eb85abe1cd810422aa6da1e276448319d25de8fe181872432"><div class="ttname"><a href="namespacetvm_1_1arith.html#ac6a38da661cd3681eb85abe1cd810422aa6da1e276448319d25de8fe181872432">tvm::arith::kTruncDiv</a></div><div class="ttdoc">Truncated division. </div><div class="ttdef"><b>Definition:</b> analyzer.h:57</div></div>
diff --git a/docs/reference/api/doxygen/buffer_8h_source.html b/docs/reference/api/doxygen/buffer_8h_source.html
index bd9ef0ab4..de4f39a0e 100644
--- a/docs/reference/api/doxygen/buffer_8h_source.html
+++ b/docs/reference/api/doxygen/buffer_8h_source.html
@@ -101,7 +101,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_aebad9f7235dd20af649fb5c2113797b8"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#aebad9f7235dd20af649fb5c2113797b8">tvm::runtime::DataType::Handle</a></div><div class="ttdeci">static DataType Handle(int bits=64, int lanes=1)</div><div class="ttdoc">Construct a handle type. </div><div class="ttdef"><b>Definition:</b> data_type.h:188</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BufferNode_html_a1abac917e1de0b3c43774ee94477016b"><div class="ttname"><a href="classtvm_1_1tir_1_1BufferNode.html#a1abac917e1de0b3c43774ee94477016b">tvm::tir::BufferNode::BufferNode</a></div><div class="ttdeci">BufferNode()</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> buffer.h:99</div></div>
-<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
+<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:518</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Stmt_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></div><div class="ttdoc">Container of all statements. </div><div class="ttdef"><b>Definition:</b> stmt.h:57</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
diff --git a/docs/reference/api/doxygen/constant__utils_8h_source.html b/docs/reference/api/doxygen/constant__utils_8h_source.html
index 32f432641..b4816e06b 100644
--- a/docs/reference/api/doxygen/constant__utils_8h_source.html
+++ b/docs/reference/api/doxygen/constant__utils_8h_source.html
@@ -71,12 +71,12 @@ $(function() {
 <div class="ttc" id="analyzer_8h_html"><div class="ttname"><a href="analyzer_8h.html">analyzer.h</a></div><div class="ttdoc">Algebra expression simplifications. </div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html_a9b440f852f12ad0a4d8ed5ed97054425"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html#a9b440f852f12ad0a4d8ed5ed97054425">tvm::arith::Analyzer::Simplify</a></div><div class="ttdeci">PrimExpr Simplify(const PrimExpr &amp;expr, int steps=2)</div><div class="ttdoc">Simplify expr. </div></div>
 <div class="ttc" id="tir_2analysis_8h_html"><div class="ttname"><a href="tir_2analysis_8h.html">analysis.h</a></div><div class="ttdoc">Analysis utilitie and passes for TIR. </div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="structtvm_1_1tir_1_1ExprDeepEqual_html"><div class="ttname"><a href="structtvm_1_1tir_1_1ExprDeepEqual.html">tvm::tir::ExprDeepEqual</a></div><div class="ttdoc">Compare two expressions recursively and check if they are equal to each other without var remapping...</div><div class="ttdef"><b>Definition:</b> analysis.h:54</div></div>
 <div class="ttc" id="tir_2expr_8h_html"><div class="ttname"><a href="tir_2expr_8h.html">expr.h</a></div><div class="ttdoc">TIR expressions. </div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_ae8c7db788e840dc1c2ed1f365d5ea829"><div class="ttname"><a href="namespacetvm_1_1tir.html#ae8c7db788e840dc1c2ed1f365d5ea829">tvm::tir::IntImmNode</a></div><div class="ttdeci">tvm::IntImmNode IntImmNode</div><div class="ttdef"><b>Definition:</b> expr.h:49</div></div>
 <div class="ttc" id="operation_8h_html"><div class="ttname"><a href="operation_8h.html">operation.h</a></div><div class="ttdoc">Operation node can generate one or multiple Tensors. </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a5c414d5e54c099ad7287be302aac8f02"><div class="ttname"><a href="namespacetvm_1_1tir.html#a5c414d5e54c099ad7287be302aac8f02">tvm::tir::is_const_int</a></div><div class="ttdeci">bool is_const_int(const PrimExpr &amp;x, int64_t value)</div><div class="ttdoc">Check whether x is a constant integer expression. </div><div class="ttdef"><b>Definition:</b> op.h:1086</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a5c414d5e54c099ad7287be302aac8f02"><div class="ttname"><a href="namespacetvm_1_1tir.html#a5c414d5e54c099ad7287be302aac8f02">tvm::tir::is_const_int</a></div><div class="ttdeci">bool is_const_int(const PrimExpr &amp;x, int64_t value)</div><div class="ttdoc">Check whether x is a constant integer expression. </div><div class="ttdef"><b>Definition:</b> op.h:891</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html">tvm::arith::Analyzer</a></div><div class="ttdoc">Analyzer that contains bunch of sub-analyzers. </div><div class="ttdef"><b>Definition:</b> analyzer.h:423</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/reference/api/doxygen/cuda_2dense_8h_source.html b/docs/reference/api/doxygen/cuda_2dense_8h_source.html
index a5a24865b..d1533bdf3 100644
--- a/docs/reference/api/doxygen/cuda_2dense_8h_source.html
+++ b/docs/reference/api/doxygen/cuda_2dense_8h_source.html
@@ -81,7 +81,7 @@ $(function() {
 <div class="ttc" id="schedule__pass_8h_html"><div class="ttname"><a href="schedule__pass_8h.html">schedule_pass.h</a></div><div class="ttdoc">Collection of Schedule pass functions. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1contrib_html_a68c8492ea536608724ef6267b0785054"><div class="ttname"><a href="namespacetvm_1_1topi_1_1contrib.html#a68c8492ea536608724ef6267b0785054">tvm::topi::contrib::cublas_matmul</a></div><div class="ttdeci">Tensor cublas_matmul(const Tensor &amp;lhs, const Tensor &amp;rhs, bool transa, bool transb)</div><div class="ttdoc">Create an op that multiplies lhs and rhs with cuBLAS. </div><div class="ttdef"><b>Definition:</b> cublas.h:46</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a13aaf23f0ab77f1ed4a7d4b7816bf210"><div class="ttname"><a href="namespacetvm_1_1topi.html#a13aaf23f0ab77f1ed4a7d4b7816bf210">tvm::topi::kBroadcast</a></div><div class="ttdeci">constexpr auto kBroadcast</div><div class="ttdef"><b>Definition:</b> tags.h:36</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_a34e1a8305acf89ef2f745c8d99bf8e89"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#a34e1a8305acf89ef2f745c8d99bf8e89">tvm::topi::nn::dense</a></div><div class="ttdeci">tvm::te::Tensor dense(const tvm::te::Tensor &amp;data, const tvm::te::Tensor &amp;weight, const tvm::te::Tensor &amp;bias, const DataType &amp;out_dtype)</div><div class="ttdoc">Creates an operation that calculates data * weight^T + bias. </div><div class="t [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a17d8d5ad92691f9e18e3e0ae8ef69e4f"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a17d8d5ad92691f9e18e3e0ae8ef69e4f">tvm::runtime::ObjectRef::defined</a></div><div class="ttdeci">bool defined() const</div><div class="ttdef"><b>Definition:</b> object.h:544</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html">tvm::runtime::DataType</a></div><div class="ttdoc">Runtime primitive data type. </div><div class="ttdef"><b>Definition:</b> data_type.h:41</div></div>
diff --git a/docs/reference/api/doxygen/cuda_2injective_8h_source.html b/docs/reference/api/doxygen/cuda_2injective_8h_source.html
index 5a275dfe3..2bf530303 100644
--- a/docs/reference/api/doxygen/cuda_2injective_8h_source.html
+++ b/docs/reference/api/doxygen/cuda_2injective_8h_source.html
@@ -75,8 +75,8 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1IterVar_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IterVar.html">tvm::tir::IterVar</a></div><div class="ttdoc">Iteration Variable, represents an iteration over an integer interval. </div><div class="ttdef"><b>Definition:</b> var.h:301</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="schedule__pass_8h_html"><div class="ttname"><a href="schedule__pass_8h.html">schedule_pass.h</a></div><div class="ttdoc">Collection of Schedule pass functions. </div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:441</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:655</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1x86_html_afde6f5b6bb1825d127238b9a55a29337"><div class="ttname"><a href="namespacetvm_1_1topi_1_1x86.html#afde6f5b6bb1825d127238b9a55a29337">tvm::topi::x86::schedule_injective_from_existing</a></div><div class="ttdeci">Schedule schedule_injective_from_existing(Schedule sch, const Tensor &amp;out)</div><div class="ttdoc">Updates an existing schedule for the given injective ops. </div><div class="ttdef"><b>Definition:</b> injective.h:47</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1ComputeOpNode_html"><div class="ttname"><a href="classtvm_1_1te_1_1ComputeOpNode.html">tvm::te::ComputeOpNode</a></div><div class="ttdoc">A Compute op that compute a tensor on certain domain. </div><div class="ttdef"><b>Definition:</b> operation.h:226</div></div>
@@ -91,7 +91,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a2d76fa1fb628ff276a284e61123589c5"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a2d76fa1fb628ff276a284e61123589c5">tvm::runtime::ObjectRef::as</a></div><div class="ttdeci">const ObjectType * as() const</div><div class="ttdoc">Try to downcast the internal Object to a raw pointer of a corresponding type. </div><div class="ttdef"><b>Definition:</b> object.h:865</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1x86_html_a9ff21a27a270e187c985a93b565232c4"><div class="ttname"><a href="namespacetvm_1_1topi_1_1x86.html#a9ff21a27a270e187c985a93b565232c4">tvm::topi::x86::schedule_injective</a></div><div class="ttdeci">Schedule schedule_injective(const Target &amp;target, const Array&lt; Tensor &gt; &amp;outs)</div><div class="ttdoc">Create an x86 schedule for the given injective ops. </div><div class="ttdef"><b>Definition:</b> injective.h:68</div></div>
 <div class="ttc" id="generic__func_8h_html"><div class="ttname"><a href="generic__func_8h.html">generic_func.h</a></div><div class="ttdoc">Generic function that can be specialzied on a per target basis. </div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/cuda_2pooling_8h_source.html b/docs/reference/api/doxygen/cuda_2pooling_8h_source.html
index 324599c4c..ecfb06f4e 100644
--- a/docs/reference/api/doxygen/cuda_2pooling_8h_source.html
+++ b/docs/reference/api/doxygen/cuda_2pooling_8h_source.html
@@ -78,8 +78,8 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_1_1rocm_html_a45aee34b0000f98aafd958ffe9baebc0"><div class="ttname"><a href="namespacetvm_1_1topi_1_1rocm.html#a45aee34b0000f98aafd958ffe9baebc0">tvm::topi::rocm::schedule_global_pool</a></div><div class="ttdeci">Schedule schedule_global_pool(const Target &amp;target, const Array&lt; Tensor &gt; &amp;outs)</div><div class="ttdoc">Create a rocm schedule for global_pool. </div><div class="ttdef"><b>Definition:</b> pooling.h:61</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="schedule__pass_8h_html"><div class="ttname"><a href="schedule__pass_8h.html">schedule_pass.h</a></div><div class="ttdoc">Collection of Schedule pass functions. </div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:441</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:655</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1ComputeOpNode_html"><div class="ttname"><a href="classtvm_1_1te_1_1ComputeOpNode.html">tvm::te::ComputeOpNode</a></div><div class="ttdoc">A Compute op that compute a tensor on certain domain. </div><div class="ttdef"><b>Definition:</b> operation.h:226</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1BaseComputeOpNode_html_a21617a643897727c51ded2b7260df4c3"><div class="ttname"><a href="classtvm_1_1te_1_1BaseComputeOpNode.html#a21617a643897727c51ded2b7260df4c3">tvm::te::BaseComputeOpNode::axis</a></div><div class="ttdeci">Array&lt; IterVar &gt; axis</div><div class="ttdoc">IterVar on each axis. </div><div class="ttdef"><b>Definition:</b> operation.h:207</div></div>
@@ -92,7 +92,7 @@ $(function() {
 <div class="ttc" id="tags_8h_html"><div class="ttname"><a href="tags_8h.html">tags.h</a></div><div class="ttdoc">External function interface to rocBLAS libraries. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a2d76fa1fb628ff276a284e61123589c5"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a2d76fa1fb628ff276a284e61123589c5">tvm::runtime::ObjectRef::as</a></div><div class="ttdeci">const ObjectType * as() const</div><div class="ttdoc">Try to downcast the internal Object to a raw pointer of a corresponding type. </div><div class="ttdef"><b>Definition:</b> object.h:865</div></div>
 <div class="ttc" id="generic__func_8h_html"><div class="ttname"><a href="generic__func_8h.html">generic_func.h</a></div><div class="ttdoc">Generic function that can be specialzied on a per target basis. </div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/cuda_2reduction_8h_source.html b/docs/reference/api/doxygen/cuda_2reduction_8h_source.html
index cb33cbf16..86a0aa03a 100644
--- a/docs/reference/api/doxygen/cuda_2reduction_8h_source.html
+++ b/docs/reference/api/doxygen/cuda_2reduction_8h_source.html
@@ -83,10 +83,10 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a90e90b3f4ba8a590baff78c75807bbc7"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a90e90b3f4ba8a590baff78c75807bbc7">tvm::runtime::Object::IsInstance</a></div><div class="ttdeci">bool IsInstance() const</div><div class="ttdef"><b>Definition:</b> object.h:829</div></div>
 <div class="ttc" id="schedule__pass_8h_html"><div class="ttname"><a href="schedule__pass_8h.html">schedule_pass.h</a></div><div class="ttdoc">Collection of Schedule pass functions. </div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1OperationNode_html_a9675fbb905d62de5b86624388acec4b1"><div class="ttname"><a href="classtvm_1_1te_1_1OperationNode.html#a9675fbb905d62de5b86624388acec4b1">tvm::te::OperationNode::InputTensors</a></div><div class="ttdeci">virtual Array&lt; Tensor &gt; InputTensors() const =0</div><div class="ttdoc">List all the input Tensors. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aed6387e67d18b9d5ad18f510fd600a25"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aed6387e67d18b9d5ad18f510fd600a25">tvm::runtime::Array::size</a></div><div class="ttdeci">size_t size() const</div><div class="ttdef"><b>Definition:</b> array.h:399</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:441</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:655</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1ComputeOpNode_html"><div class="ttname"><a href="classtvm_1_1te_1_1ComputeOpNode.html">tvm::te::ComputeOpNode</a></div><div class="ttdoc">A Compute op that compute a tensor on certain domain. </div><div class="ttdef"><b>Definition:</b> operation.h:226</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1BaseComputeOpNode_html_a21617a643897727c51ded2b7260df4c3"><div class="ttname"><a href="classtvm_1_1te_1_1BaseComputeOpNode.html#a21617a643897727c51ded2b7260df4c3">tvm::te::BaseComputeOpNode::axis</a></div><div class="ttdeci">Array&lt; IterVar &gt; axis</div><div class="ttdoc">IterVar on each axis. </div><div class="ttdef"><b>Definition:</b> operation.h:207</div></div>
@@ -104,7 +104,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_1_1cuda_html_a9009672dab261008d66d4e59d896935f"><div class="ttname"><a href="namespacetvm_1_1topi_1_1cuda.html#a9009672dab261008d66d4e59d896935f">tvm::topi::cuda::TraverseAfterReduce</a></div><div class="ttdeci">void TraverseAfterReduce(const Target &amp;target, Schedule s, Operation op)</div><div class="ttdoc">Schedule a reduce op, then invoke TraverseBeforeReduce on each of the op&amp;#39;s inputs. </div><div class="ttdef"><b>Definition:</b> re [...]
 <div class="ttc" id="classtvm_1_1te_1_1BaseComputeOpNode_html_ad0df643468fc148d80afd7116abdd2ac"><div class="ttname"><a href="classtvm_1_1te_1_1BaseComputeOpNode.html#ad0df643468fc148d80afd7116abdd2ac">tvm::te::BaseComputeOpNode::reduce_axis</a></div><div class="ttdeci">Array&lt; IterVar &gt; reduce_axis</div><div class="ttdoc">IterVar on each reduction axis, if the body is a Reduce. </div><div class="ttdef"><b>Definition:</b> operation.h:209</div></div>
 <div class="ttc" id="generic__func_8h_html"><div class="ttname"><a href="generic__func_8h.html">generic_func.h</a></div><div class="ttdoc">Generic function that can be specialzied on a per target basis. </div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/cuda_2softmax_8h_source.html b/docs/reference/api/doxygen/cuda_2softmax_8h_source.html
index 5911fd653..4496063b3 100644
--- a/docs/reference/api/doxygen/cuda_2softmax_8h_source.html
+++ b/docs/reference/api/doxygen/cuda_2softmax_8h_source.html
@@ -75,7 +75,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_aa2cb22c64412c3eacb351c12b883333b"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#aa2cb22c64412c3eacb351c12b883333b">tvm::topi::nn::softmax</a></div><div class="ttdeci">Tensor softmax(const Tensor &amp;x, int axis=-1, std::string name=&quot;tensor&quot;, std::string tag=&quot;softmax_output&quot;)</div><div class="ttdoc">Softmax activation. </div><div class="ttdef"><b>Definition:</b> softmax.h:50</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="schedule__pass_8h_html"><div class="ttname"><a href="schedule__pass_8h.html">schedule_pass.h</a></div><div class="ttdoc">Collection of Schedule pass functions. </div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1ComputeOpNode_html"><div class="ttname"><a href="classtvm_1_1te_1_1ComputeOpNode.html">tvm::te::ComputeOpNode</a></div><div class="ttdoc">A Compute op that compute a tensor on certain domain. </div><div class="ttdef"><b>Definition:</b> operation.h:226</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1BaseComputeOpNode_html_a21617a643897727c51ded2b7260df4c3"><div class="ttname"><a href="classtvm_1_1te_1_1BaseComputeOpNode.html#a21617a643897727c51ded2b7260df4c3">tvm::te::BaseComputeOpNode::axis</a></div><div class="ttdeci">Array&lt; IterVar &gt; axis</div><div class="ttdoc">IterVar on each axis. </div><div class="ttdef"><b>Definition:</b> operation.h:207</div></div>
diff --git a/docs/reference/api/doxygen/dataflow__matcher_8h_source.html b/docs/reference/api/doxygen/dataflow__matcher_8h_source.html
index 8ae6b0f65..0f586229e 100644
--- a/docs/reference/api/doxygen/dataflow__matcher_8h_source.html
+++ b/docs/reference/api/doxygen/dataflow__matcher_8h_source.html
@@ -75,7 +75,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1relay_html_a48108863604dc310eb3184b13b42a92a"><div class="ttname"><a href="namespacetvm_1_1relay.html#a48108863604dc310eb3184b13b42a92a">tvm::relay::PartitionPattern</a></div><div class="ttdeci">Expr PartitionPattern(DFPattern pattern, Expr expr, Map&lt; String, ObjectRef &gt; attrs, PackedFunc check)</div><div class="ttdoc">Partition all matches of a DFPattern inside an Expr into separate Function calls. ...</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1DFPatternCallbackNode_html_a9e5a819e8e816bbacc0e9be93a169ebf"><div class="ttname"><a href="classtvm_1_1relay_1_1DFPatternCallbackNode.html#a9e5a819e8e816bbacc0e9be93a169ebf">tvm::relay::DFPatternCallbackNode::_type_key</a></div><div class="ttdeci">static constexpr const char * _type_key</div><div class="ttdef"><b>Definition:</b> dataflow_matcher.h:59</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1DFPattern_html"><div class="ttname"><a href="classtvm_1_1relay_1_1DFPattern.html">tvm::relay::DFPattern</a></div><div class="ttdoc">Managed reference to dataflow patterns. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:50</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
diff --git a/docs/reference/api/doxygen/dataflow__pattern_8h_source.html b/docs/reference/api/doxygen/dataflow__pattern_8h_source.html
index 3eaa920cd..8ce4ed0a8 100644
--- a/docs/reference/api/doxygen/dataflow__pattern_8h_source.html
+++ b/docs/reference/api/doxygen/dataflow__pattern_8h_source.html
@@ -66,16 +66,16 @@ $(function() {
 <div class="title">dataflow_pattern.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="dataflow__pattern_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * o [...]
+<a href="dataflow__pattern_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * o [...]
 <div class="ttc" id="classtvm_1_1relay_1_1LetPatternNode_html_abadb55373eed317050a1afad4e131edf"><div class="ttname"><a href="classtvm_1_1relay_1_1LetPatternNode.html#abadb55373eed317050a1afad4e131edf">tvm::relay::LetPatternNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:237</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a61dc962beca310c25ef857e51fb3553c"><div class="ttname"><a href="namespacetvm_1_1relay.html#a61dc962beca310c25ef857e51fb3553c">tvm::relay::IsExpr</a></div><div class="ttdeci">DFPattern IsExpr(const Expr &amp;expr)</div><div class="ttdoc">Syntatic Sugar for creating a ExprPattern. </div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1DominatorPatternNode_html_a752951f9926f6011dc4d925fcca44c9a"><div class="ttname"><a href="classtvm_1_1relay_1_1DominatorPatternNode.html#a752951f9926f6011dc4d925fcca44c9a">tvm::relay::DominatorPatternNode::path</a></div><div class="ttdeci">DFPattern path</div><div class="ttdoc">The path. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:502</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1DominatorPatternNode_html_a00e08818531d096526c3d4368c0ddeec"><div class="ttname"><a href="classtvm_1_1relay_1_1DominatorPatternNode.html#a00e08818531d096526c3d4368c0ddeec">tvm::relay::DominatorPatternNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:506</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExprPattern_html"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprPattern.html">tvm::relay::ExprPattern</a></div><div class="ttdoc">A pattern which matches a literal expression. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:100</div></div>
+<div class="ttc" id="namespacetvm_html_ac3bf2ef3556c995846dddcd84e5db8a6"><div class="ttname"><a href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">tvm::operator||</a></div><div class="ttdeci">PrimExpr operator||(PrimExpr a, PrimExpr b)</div><div class="ttdoc">or </div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1LetPatternNode_html_aff0c6dec182a3173fe0cb601a5b74ed1"><div class="ttname"><a href="classtvm_1_1relay_1_1LetPatternNode.html#aff0c6dec182a3173fe0cb601a5b74ed1">tvm::relay::LetPatternNode::var</a></div><div class="ttdeci">DFPattern var</div><div class="ttdoc">The variable we bind to. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:231</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1DataTypePatternNode_html"><div class="ttname"><a href="classtvm_1_1relay_1_1DataTypePatternNode.html">tvm::relay::DataTypePatternNode</a></div><div class="ttdoc">Pattern for Types. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:437</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TupleGetItemPatternNode_html_aeaf2a6a9367c89561f6f816e7f58624b"><div class="ttname"><a href="classtvm_1_1relay_1_1TupleGetItemPatternNode.html#aeaf2a6a9367c89561f6f816e7f58624b">tvm::relay::TupleGetItemPatternNode::index</a></div><div class="ttdeci">int index</div><div class="ttdoc">which value to get </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:290</div></div>
-<div class="ttc" id="namespacetvm_html_a002710a4652156a57495e10a09b5d002"><div class="ttname"><a href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">tvm::operator||</a></div><div class="ttdeci">Bool operator||(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:379</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1AltPatternNode_html_a5809e714902884904b47e81004c30110"><div class="ttname"><a href="classtvm_1_1relay_1_1AltPatternNode.html#a5809e714902884904b47e81004c30110">tvm::relay::AltPatternNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:338</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ShapePatternNode_html_a77130ba4e4b1b051415a08a6c0148d30"><div class="ttname"><a href="classtvm_1_1relay_1_1ShapePatternNode.html#a77130ba4e4b1b051415a08a6c0148d30">tvm::relay::ShapePatternNode::pattern</a></div><div class="ttdeci">DFPattern pattern</div><div class="ttdoc">The pattern. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:411</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TypePatternNode_html_a7273a1fa7c10a1c4a5f3a4d46bcd463a"><div class="ttname"><a href="classtvm_1_1relay_1_1TypePatternNode.html#a7273a1fa7c10a1c4a5f3a4d46bcd463a">tvm::relay::TypePatternNode::pattern</a></div><div class="ttdeci">DFPattern pattern</div><div class="ttdoc">The pattern. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:382</div></div>
@@ -129,7 +129,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1FunctionPatternNode_html_a5e831242707df5d59474816c416abe97"><div class="ttname"><a href="classtvm_1_1relay_1_1FunctionPatternNode.html#a5e831242707df5d59474816c416abe97">tvm::relay::FunctionPatternNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:201</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_af30c02f3a3f37c7963b3af60fb9c72a1"><div class="ttname"><a href="namespacetvm_1_1topi.html#af30c02f3a3f37c7963b3af60fb9c72a1">tvm::topi::shape</a></div><div class="ttdeci">Tensor shape(const Tensor &amp;src, DataType dtype, const std::string name=&quot;T_shape&quot;, const std::string tag=kInjective)</div><div class="ttdoc">Get the shape of input tensor. </div><div class="ttdef"><b>Definition:</b> transform.h:1758</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ConstantPatternNode_html"><div class="ttname"><a href="classtvm_1_1relay_1_1ConstantPatternNode.html">tvm::relay::ConstantPatternNode</a></div><div class="ttdoc">Container for Constant. </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:138</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TuplePatternNode_html_abfb4c8c82c843b9bc53808e9096039ce"><div class="ttname"><a href="classtvm_1_1relay_1_1TuplePatternNode.html#abfb4c8c82c843b9bc53808e9096039ce">tvm::relay::TuplePatternNode::fields</a></div><div class="ttdeci">tvm::Array&lt; DFPattern &gt; fields</div><div class="ttdoc">the fields of the tuple </div><div class="ttdef"><b>Definition:</b> dataflow_pattern.h:269</div></div>
diff --git a/docs/reference/api/doxygen/detail_2broadcast_8h_source.html b/docs/reference/api/doxygen/detail_2broadcast_8h_source.html
index c804fdbe2..6fed29a70 100644
--- a/docs/reference/api/doxygen/detail_2broadcast_8h_source.html
+++ b/docs/reference/api/doxygen/detail_2broadcast_8h_source.html
@@ -76,7 +76,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="namespacetvm_html_a0df5ca82d2c566f628ebb2f1e84a3fcb"><div class="ttname"><a href="namespacetvm.html#a0df5ca82d2c566f628ebb2f1e84a3fcb">tvm::max</a></div><div class="ttdeci">PrimExpr max(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take maximum of two values </div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_ae8c7db788e840dc1c2ed1f365d5ea829"><div class="ttname"><a href="namespacetvm_1_1tir.html#ae8c7db788e840dc1c2ed1f365d5ea829">tvm::tir::IntImmNode</a></div><div class="ttdeci">tvm::IntImmNode IntImmNode</div><div class="ttdef"><b>Definition:</b> expr.h:49</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_aed3f57cf8d1c3546f075701898c5b70f"><div class="ttname"><a href="namespacetvm_1_1tir.html#aed3f57cf8d1c3546f075701898c5b70f">tvm::tir::make_zero</a></div><div class="ttdeci">PrimExpr make_zero(DataType t, Span span=Span())</div><div class="ttdoc">Make a const zero expr. </div><div class="ttdef"><b>Definition:</b> op.h:1138</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_aed3f57cf8d1c3546f075701898c5b70f"><div class="ttname"><a href="namespacetvm_1_1tir.html#aed3f57cf8d1c3546f075701898c5b70f">tvm::tir::make_zero</a></div><div class="ttdeci">PrimExpr make_zero(DataType t, Span span=Span())</div><div class="ttdoc">Make a const zero expr. </div><div class="ttdef"><b>Definition:</b> op.h:943</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_af30c02f3a3f37c7963b3af60fb9c72a1"><div class="ttname"><a href="namespacetvm_1_1topi.html#af30c02f3a3f37c7963b3af60fb9c72a1">tvm::topi::shape</a></div><div class="ttdeci">Tensor shape(const Tensor &amp;src, DataType dtype, const std::string name=&quot;T_shape&quot;, const std::string tag=kInjective)</div><div class="ttdoc">Get the shape of input tensor. </div><div class="ttdef"><b>Definition:</b> transform.h:1758</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_a6b097149e69ea03fe3b812a3f5f7fcd9"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#a6b097149e69ea03fe3b812a3f5f7fcd9">tvm::runtime::Array::end</a></div><div class="ttdeci">iterator end() const</div><div class="ttdef"><b>Definition:</b> array.h:369</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1Tensor_html"><div class="ttname"><a href="classtvm_1_1te_1_1Tensor.html">tvm::te::Tensor</a></div><div class="ttdoc">Tensor structure representing a possible input, or intermediate computation result. </div><div class="ttdef"><b>Definition:</b> tensor.h:102</div></div>
diff --git a/docs/reference/api/doxygen/detail_2extern_8h_source.html b/docs/reference/api/doxygen/detail_2extern_8h_source.html
index afba1651d..7c784c108 100644
--- a/docs/reference/api/doxygen/detail_2extern_8h_source.html
+++ b/docs/reference/api/doxygen/detail_2extern_8h_source.html
@@ -66,7 +66,7 @@ $(function() {
 <div class="title">extern.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="detail_2extern_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or m [...]
+<a href="detail_2extern_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or m [...]
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Call_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Call.html">tvm::tir::Call</a></div><div class="ttdoc">Managed reference to CallNode. </div><div class="ttdef"><b>Definition:</b> expr.h:947</div></div>
diff --git a/docs/reference/api/doxygen/dilate_8h_source.html b/docs/reference/api/doxygen/dilate_8h_source.html
index 4ad7644f2..2f17691b0 100644
--- a/docs/reference/api/doxygen/dilate_8h_source.html
+++ b/docs/reference/api/doxygen/dilate_8h_source.html
@@ -67,7 +67,7 @@ $(function() {
 </div><!--header-->
 <div class="contents">
 <a href="dilate_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more cont [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
 <div class="ttc" id="namespacetvm_html_a353217978feabae3575560bf1586885f"><div class="ttname"><a href="namespacetvm.html#a353217978feabae3575560bf1586885f">tvm::if_then_else</a></div><div class="ttdeci">PrimExpr if_then_else(PrimExpr cond, PrimExpr true_value, PrimExpr false_value, Span span=Span())</div><div class="ttdoc">Conditional expression. </div></div>
diff --git a/docs/reference/api/doxygen/doc_8h_source.html b/docs/reference/api/doxygen/doc_8h_source.html
index 81462359a..eb747ca21 100644
--- a/docs/reference/api/doxygen/doc_8h_source.html
+++ b/docs/reference/api/doxygen/doc_8h_source.html
@@ -79,7 +79,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:167</div></div>
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1LiteralDoc_html_aad7a2c38982b0a9d5f874eaf3e4676ba"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html#aad7a2c38982b0a9d5f874eaf3e4676ba">tvm::script::printer::LiteralDoc::Float</a></div><div class="ttdeci">static LiteralDoc Float(double v)</div><div class="ttdoc">Create a LiteralDoc to represent float. </div><div class="ttdef"><b>Definition:</b> doc.h:150</div></div>
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1LiteralDoc_html_ad7b614de314ca220044ccfd78e4a9640"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html#ad7b614de314ca220044ccfd78e4a9640">tvm::script::printer::LiteralDoc::None</a></div><div class="ttdeci">static LiteralDoc None()</div><div class="ttdoc">Create a LiteralDoc to represent None/null/empty value. </div><div class="ttdef"><b>Definition:</b> doc.h:132</div></div>
-<div class="ttc" id="classtvm_1_1FloatImm_html"><div class="ttname"><a href="classtvm_1_1FloatImm.html">tvm::FloatImm</a></div><div class="ttdoc">Managed reference class to FloatImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:350</div></div>
+<div class="ttc" id="classtvm_1_1FloatImm_html"><div class="ttname"><a href="classtvm_1_1FloatImm.html">tvm::FloatImm</a></div><div class="ttdoc">Managed reference class to FloatImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:564</div></div>
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1ExprDocNode_html_af03dd1b2e3d5695c273e01d4c4d40c33"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html#af03dd1b2e3d5695c273e01d4c4d40c33">tvm::script::printer::ExprDocNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> doc.h:73</div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1LiteralDoc_html"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html">tvm::script::printer::LiteralDoc</a></div><div class="ttdoc">Reference type of LiteralDocNode. </div><div class="ttdef"><b>Definition:</b> doc.h:124</div></div>
@@ -87,7 +87,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1DocNode_html"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1DocNode.html">tvm::script::printer::DocNode</a></div><div class="ttdoc">The base class of all Doc. </div><div class="ttdef"><b>Definition:</b> doc.h:41</div></div>
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1LiteralDoc_html_a789d7d73bd4d94612fa2a84c16b26b89"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html#a789d7d73bd4d94612fa2a84c16b26b89">tvm::script::printer::LiteralDoc::Str</a></div><div class="ttdeci">static LiteralDoc Str(const String &amp;v)</div><div class="ttdoc">Create a LiteralDoc to represent string. </div><div class="ttdef"><b>Definition:</b> doc.h:156</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a237a714a6a16e14aa01fa4ac52426551"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a237a714a6a16e14aa01fa4ac52426551">tvm::runtime::DataType::Float</a></div><div class="ttdeci">static DataType Float(int bits, int lanes=1)</div><div class="ttdoc">Construct an float type. </div><div class="ttdef"><b>Definition:</b> data_type.h:168</div></div>
-<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
+<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:518</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1script_1_1printer_1_1DocNode_html_a78585f033948da5d1a4121aa7b969c47"><div class="ttname"><a href="classtvm_1_1script_1_1printer_1_1DocNode.html#a78585f033948da5d1a4121aa7b969c47">tvm::script::printer::DocNode::TVM_DECLARE_BASE_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_BASE_OBJECT_INFO(DocNode, Object)</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
diff --git a/docs/reference/api/doxygen/elemwise_8h_source.html b/docs/reference/api/doxygen/elemwise_8h_source.html
index 8ed0b8a38..62a9d4fc6 100644
--- a/docs/reference/api/doxygen/elemwise_8h_source.html
+++ b/docs/reference/api/doxygen/elemwise_8h_source.html
@@ -72,7 +72,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_a7ac1dc0d99ce93090a4cdf90ab19d4b8"><div class="ttname"><a href="namespacetvm_1_1topi.html#a7ac1dc0d99ce93090a4cdf90ab19d4b8">tvm::topi::minimum</a></div><div class="ttdeci">tvm::PrimExpr minimum(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:354</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a91313bc00e3b96717f8420181f789300"><div class="ttname"><a href="namespacetvm_1_1topi.html#a91313bc00e3b96717f8420181f789300">tvm::topi::abs</a></div><div class="ttdeci">Tensor abs(const Tensor &amp;x, std::string name=&quot;T_&quot; &quot;abs&quot;, std::string tag=kElementWise)</div><div class="ttdef"><b>Definition:</b> elemwise.h:60</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a08b96a24b96a201fb5069337e22e7336"><div class="ttname"><a href="namespacetvm_1_1topi.html#a08b96a24b96a201fb5069337e22e7336">tvm::topi::isinf</a></div><div class="ttdeci">Tensor isinf(const Tensor &amp;x, std::string name=&quot;T_&quot; &quot;isinf&quot;, std::string tag=kElementWise)</div><div class="ttdef"><b>Definition:</b> elemwise.h:75</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a5c22ce14da6f5589de129861bb06da78"><div class="ttname"><a href="namespacetvm_1_1topi.html#a5c22ce14da6f5589de129861bb06da78">tvm::topi::full</a></div><div class="ttdeci">Tensor full(const Array&lt; PrimExpr &gt; &amp;shape, DataType dtype, const PrimExpr fill_value, std::string name=&quot;T_full&quot;, std::string tag=kElementWise)</div><div class="ttdoc">Creates an operation that fill a tensor with fill_value. </div><div class="ttdef"><b>De [...]
 <div class="ttc" id="namespacetvm_1_1topi_html_a7e7506fced8d921ca7e1a10ae27b1b5b"><div class="ttname"><a href="namespacetvm_1_1topi.html#a7e7506fced8d921ca7e1a10ae27b1b5b">tvm::topi::sqrt</a></div><div class="ttdeci">Tensor sqrt(const Tensor &amp;x, std::string name=&quot;T_&quot; &quot;sqrt&quot;, std::string tag=kElementWise)</div><div class="ttdef"><b>Definition:</b> elemwise.h:52</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
diff --git a/docs/reference/api/doxygen/error_8h_source.html b/docs/reference/api/doxygen/error_8h_source.html
index de8097aa9..f2da438d1 100644
--- a/docs/reference/api/doxygen/error_8h_source.html
+++ b/docs/reference/api/doxygen/error_8h_source.html
@@ -78,7 +78,7 @@ $(function() {
 <div class="ttc" id="structtvm_1_1ErrorBuilder_html_ad40b754d2d8992b65d0bc5b116bd3f71"><div class="ttname"><a href="structtvm_1_1ErrorBuilder.html#ad40b754d2d8992b65d0bc5b116bd3f71">tvm::ErrorBuilder::operator&lt;&lt;</a></div><div class="ttdeci">ErrorBuilder &amp; operator&lt;&lt;(const T &amp;val)</div><div class="ttdef"><b>Definition:</b> error.h:55</div></div>
 <div class="ttc" id="classtvm_1_1CompileError_html_a9964c983cdc48f486a7e72c024bb41a0"><div class="ttname"><a href="classtvm_1_1CompileError.html#a9964c983cdc48f486a7e72c024bb41a0">tvm::CompileError::CompileError</a></div><div class="ttdeci">CompileError(const ErrorBuilder &amp;err)</div><div class="ttdoc">construct error from error builder. </div><div class="ttdef"><b>Definition:</b> error.h:81</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1ObjectPtrHash_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1ObjectPtrHash.html">tvm::runtime::ObjectPtrHash</a></div><div class="ttdoc">ObjectRef hash functor. </div><div class="ttdef"><b>Definition:</b> object.h:624</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:261</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:475</div></div>
 <div class="ttc" id="classtvm_1_1CompileError_html_ae6f85acdf6cea9b673f89ac2d7a7bf9e"><div class="ttname"><a href="classtvm_1_1CompileError.html#ae6f85acdf6cea9b673f89ac2d7a7bf9e">tvm::CompileError::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">Location of the error. </div><div class="ttdef"><b>Definition:</b> error.h:71</div></div>
 <div class="ttc" id="structtvm_1_1ErrorBuilder_html_a352b0727f48872b7a3cf65e6b8ac0940"><div class="ttname"><a href="structtvm_1_1ErrorBuilder.html#a352b0727f48872b7a3cf65e6b8ac0940">tvm::ErrorBuilder::CompileError</a></div><div class="ttdeci">friend class CompileError</div><div class="ttdef"><b>Definition:</b> error.h:62</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
diff --git a/docs/reference/api/doxygen/executor_8h_source.html b/docs/reference/api/doxygen/executor_8h_source.html
index 94732c906..dd2860d91 100644
--- a/docs/reference/api/doxygen/executor_8h_source.html
+++ b/docs/reference/api/doxygen/executor_8h_source.html
@@ -69,7 +69,7 @@ $(function() {
 <a href="executor_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more co [...]
 <div class="ttc" id="classtvm_1_1SEqualReducer_html_a62ba4c55928d4886853f9c33f4147340"><div class="ttname"><a href="classtvm_1_1SEqualReducer.html#a62ba4c55928d4886853f9c33f4147340">tvm::SEqualReducer::DefEqual</a></div><div class="ttdeci">bool DefEqual(const ObjectRef &amp;lhs, const ObjectRef &amp;rhs)</div><div class="ttdoc">Reduce condition to comparison of two definitions, where free vars can be mapped. ...</div><div class="ttdef"><b>Definition:</b> structural_equal.h:165</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExecutorNode_html_aa0ca9c1f174b8038ed23ab9e53bf332f"><div class="ttname"><a href="classtvm_1_1relay_1_1ExecutorNode.html#aa0ca9c1f174b8038ed23ab9e53bf332f">tvm::relay::ExecutorNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> executor.h:110</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:369</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:583</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExecutorNode_html_a7f123ca7dfec3ad6bb0dfd2aecaf468f"><div class="ttname"><a href="classtvm_1_1relay_1_1ExecutorNode.html#a7f123ca7dfec3ad6bb0dfd2aecaf468f">tvm::relay::ExecutorNode::GetAttr</a></div><div class="ttdeci">Optional&lt; TObjectRef &gt; GetAttr(const std::string &amp;attr_key, Optional&lt; TObjectRef &gt; default_value=Optional&lt; TObjectRef &gt;(nullptr)) const</div><div class="ttdoc">Get an attribute. </div><div class="ttdef"><b>Def [...]
 <div class="ttc" id="classtvm_1_1relay_1_1ExecutorNode_html_a501db1a63776671eeb45689e15ccc3c3"><div class="ttname"><a href="classtvm_1_1relay_1_1ExecutorNode.html#a501db1a63776671eeb45689e15ccc3c3">tvm::relay::ExecutorNode::_type_has_method_sequal_reduce</a></div><div class="ttdeci">static constexpr const bool _type_has_method_sequal_reduce</div><div class="ttdef"><b>Definition:</b> executor.h:116</div></div>
 <div class="ttc" id="classtvm_1_1SEqualReducer_html"><div class="ttname"><a href="classtvm_1_1SEqualReducer.html">tvm::SEqualReducer</a></div><div class="ttdoc">A Reducer class to reduce the structural equality result of two objects. </div><div class="ttdef"><b>Definition:</b> structural_equal.h:102</div></div>
diff --git a/docs/reference/api/doxygen/int__set_8h_source.html b/docs/reference/api/doxygen/int__set_8h_source.html
index 2e1461bb1..9649657dc 100644
--- a/docs/reference/api/doxygen/int__set_8h_source.html
+++ b/docs/reference/api/doxygen/int__set_8h_source.html
@@ -78,7 +78,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1arith_html_aca8806e355ad3dd5f1df9c1eca9aac9da5eb70f55e8952b4484fe6c652138ae00"><div class="ttname"><a href="namespacetvm_1_1arith.html#aca8806e355ad3dd5f1df9c1eca9aac9da5eb70f55e8952b4484fe6c652138ae00">tvm::arith::kPositive</a></div><div class="ttdef"><b>Definition:</b> int_set.h:50</div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_a68a0523bf0384e492ab222d30be9160e"><div class="ttname"><a href="namespacetvm_1_1arith.html#a68a0523bf0384e492ab222d30be9160e">tvm::arith::Union</a></div><div class="ttdeci">IntSet Union(const Array&lt; IntSet &gt; &amp;sets)</div><div class="ttdoc">Create a union set of all sets, possibly relaxed. </div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1IntSetNode_html_a689b913992364a60b3da1bc54ef13170"><div class="ttname"><a href="classtvm_1_1arith_1_1IntSetNode.html#a689b913992364a60b3da1bc54ef13170">tvm::arith::IntSetNode::TVM_DECLARE_BASE_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_BASE_OBJECT_INFO(IntSetNode, Object)</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1IntSetNode_html"><div class="ttname"><a href="classtvm_1_1arith_1_1IntSetNode.html">tvm::arith::IntSetNode</a></div><div class="ttdoc">Base class of all Integer set containers. represent a set of integers in one dimension. </div><div class="ttdef"><b>Definition:</b> int_set.h:57</div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_a73160d1f944121ad2ef22205dd496fdc"><div class="ttname"><a href="namespacetvm_1_1arith.html#a73160d1f944121ad2ef22205dd496fdc">tvm::arith::EvalSet</a></div><div class="ttdeci">IntSet EvalSet(PrimExpr e, const Map&lt; IterVar, IntSet &gt; &amp;dom_map)</div><div class="ttdoc">Find an symbolic integer set that contains all possible values of e given the domain of each iteratio...</div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_a0f148f1a1b469a3819e0fe6633b674e4"><div class="ttname"><a href="namespacetvm_1_1arith.html#a0f148f1a1b469a3819e0fe6633b674e4">tvm::arith::ExprIntSetMap</a></div><div class="ttdeci">std::unordered_map&lt; PrimExpr, IntSet, ObjectPtrHash, ObjectPtrEqual &gt; ExprIntSetMap</div><div class="ttdoc">Map from Expr to IntSet. </div><div class="ttdef"><b>Definition:</b> int_set.h:216</div></div>
diff --git a/docs/reference/api/doxygen/int__solver_8h_source.html b/docs/reference/api/doxygen/int__solver_8h_source.html
index dc9148d36..51789d64f 100644
--- a/docs/reference/api/doxygen/int__solver_8h_source.html
+++ b/docs/reference/api/doxygen/int__solver_8h_source.html
@@ -92,7 +92,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1IntConstraintsTransformNode_html_a8ce159fc6db748e5092fa937de3fde53"><div class="ttname"><a href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html#a8ce159fc6db748e5092fa937de3fde53">tvm::arith::IntConstraintsTransformNode::src</a></div><div class="ttdeci">IntConstraints src</div><div class="ttdef"><b>Definition:</b> int_solver.h:211</div></div>
 <div class="ttc" id="namespacetvm_1_1arith_html_a76835a091e20acfbce65f678114c0291"><div class="ttname"><a href="namespacetvm_1_1arith.html#a76835a091e20acfbce65f678114c0291">tvm::arith::kSimplifyRewriteCanonicalRewrite</a></div><div class="ttdeci">constexpr int kSimplifyRewriteCanonicalRewrite</div><div class="ttdef"><b>Definition:</b> int_solver.h:47</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1IntConstraints_html"><div class="ttname"><a href="classtvm_1_1arith_1_1IntConstraints.html">tvm::arith::IntConstraints</a></div><div class="ttdoc">Managed reference to IntConstraintsNode. </div><div class="ttdef"><b>Definition:</b> int_solver.h:181</div></div>
 <div class="ttc" id="tir_2expr_8h_html"><div class="ttname"><a href="tir_2expr_8h.html">expr.h</a></div><div class="ttdoc">TIR expressions. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
diff --git a/docs/reference/api/doxygen/interpreter_8h_source.html b/docs/reference/api/doxygen/interpreter_8h_source.html
index bb83dcf83..87bf7b447 100644
--- a/docs/reference/api/doxygen/interpreter_8h_source.html
+++ b/docs/reference/api/doxygen/interpreter_8h_source.html
@@ -94,7 +94,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1InterpreterClosureObj_html"><div class="ttname"><a href="classtvm_1_1relay_1_1InterpreterClosureObj.html">tvm::relay::InterpreterClosureObj</a></div><div class="ttdoc">The container type of Closures used by the interpreter. </div><div class="ttdef"><b>Definition:</b> interpreter.h:49</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc</a></div><div class="ttdoc">Please refer to TypedPackedFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> packed_func.h:60</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1RefValue_html"><div class="ttname"><a href="classtvm_1_1relay_1_1RefValue.html">tvm::relay::RefValue</a></div><div class="ttdef"><b>Definition:</b> interpreter.h:116</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1InterpreterClosureObj_html_ad316131843a237abf592b009d839c7bb"><div class="ttname"><a href="classtvm_1_1relay_1_1InterpreterClosureObj.html#ad316131843a237abf592b009d839c7bb">tvm::relay::InterpreterClosureObj::_type_key</a></div><div class="ttdeci">static constexpr const char * _type_key</div><div class="ttdef"><b>Definition:</b> interpreter.h:70</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1RefValueObj_html_a33d9d47dac60dde31a80e3d6c433fec8"><div class="ttname"><a href="structtvm_1_1relay_1_1RefValueObj.html#a33d9d47dac60dde31a80e3d6c433fec8">tvm::relay::RefValueObj::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> interpreter.h:110</div></div>
diff --git a/docs/reference/api/doxygen/ir_2adt_8h_source.html b/docs/reference/api/doxygen/ir_2adt_8h_source.html
index b04833bc8..17e601795 100644
--- a/docs/reference/api/doxygen/ir_2adt_8h_source.html
+++ b/docs/reference/api/doxygen/ir_2adt_8h_source.html
@@ -89,14 +89,14 @@ $(function() {
 <div class="ttc" id="classtvm_1_1TypeDataNode_html_a350a23efc88be1def5b93d27ac6fa88b"><div class="ttname"><a href="classtvm_1_1TypeDataNode.html#a350a23efc88be1def5b93d27ac6fa88b">tvm::TypeDataNode::type_vars</a></div><div class="ttdeci">Array&lt; TypeVar &gt; type_vars</div><div class="ttdoc">The type variables (to allow for polymorphism). </div><div class="ttdef"><b>Definition:</b> adt.h:112</div></div>
 <div class="ttc" id="classtvm_1_1TypeDataNode_html_a4b8e6e09856e514b91586223d48fd117"><div class="ttname"><a href="classtvm_1_1TypeDataNode.html#a4b8e6e09856e514b91586223d48fd117">tvm::TypeDataNode::constructors</a></div><div class="ttdeci">Array&lt; Constructor &gt; constructors</div><div class="ttdoc">The constructors. </div><div class="ttdef"><b>Definition:</b> adt.h:114</div></div>
 <div class="ttc" id="runtime_2container_2adt_8h_html"><div class="ttname"><a href="runtime_2container_2adt_8h.html">adt.h</a></div><div class="ttdoc">Runtime ADT container types. </div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:153</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:367</div></div>
 <div class="ttc" id="classtvm_1_1TypeDataNode_html_aa8db517fa4778bf6c3f2106133fba7d5"><div class="ttname"><a href="classtvm_1_1TypeDataNode.html#aa8db517fa4778bf6c3f2106133fba7d5">tvm::TypeDataNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> adt.h:116</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1TypeNode_html"><div class="ttname"><a href="classtvm_1_1TypeNode.html">tvm::TypeNode</a></div><div class="ttdoc">Type is the base type of all types. </div><div class="ttdef"><b>Definition:</b> type.h:74</div></div>
 <div class="ttc" id="classtvm_1_1TypeDataNode_html"><div class="ttname"><a href="classtvm_1_1TypeDataNode.html">tvm::TypeDataNode</a></div><div class="ttdoc">TypeData container node. </div><div class="ttdef"><b>Definition:</b> adt.h:102</div></div>
 <div class="ttc" id="classtvm_1_1ConstructorNode_html_a7137fbec25bcaae585f532221402e892"><div class="ttname"><a href="classtvm_1_1ConstructorNode.html#a7137fbec25bcaae585f532221402e892">tvm::ConstructorNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> adt.h:60</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1ConstructorNode_html_a396a76459b3a46eefdfcd952c69c5ee4"><div class="ttname"><a href="classtvm_1_1ConstructorNode.html#a396a76459b3a46eefdfcd952c69c5ee4">tvm::ConstructorNode::tag</a></div><div class="ttdeci">int32_t tag</div><div class="ttdoc">Index in the table of constructors (set when the type is registered). </div><div class="ttdef"><b>Definition:</b> adt.h:56</div></div>
 <div class="ttc" id="classtvm_1_1ConstructorNode_html_af21463fd4d36fa74261a95bc7700d2c5"><div class="ttname"><a href="classtvm_1_1ConstructorNode.html#af21463fd4d36fa74261a95bc7700d2c5">tvm::ConstructorNode::TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_FINAL_OBJECT_INFO(ConstructorNode, RelayExprNode)</div></div>
@@ -110,7 +110,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1TypeData_html"><div class="ttname"><a href="classtvm_1_1TypeData.html">tvm::TypeData</a></div><div class="ttdoc">Stores all data for an Algebraic Data Type (ADT). </div><div class="ttdef"><b>Definition:</b> adt.h:149</div></div>
 <div class="ttc" id="classtvm_1_1Type_html"><div class="ttname"><a href="classtvm_1_1Type.html">tvm::Type</a></div><div class="ttdoc">Managed reference to TypeNode. </div><div class="ttdef"><b>Definition:</b> type.h:93</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a6e725a1cb4c83346e261eac7dc7292a8"><div class="ttname"><a href="namespacetvm_1_1relay.html#a6e725a1cb4c83346e261eac7dc7292a8">tvm::relay::TypeData</a></div><div class="ttdeci">tvm::TypeData TypeData</div><div class="ttdef"><b>Definition:</b> adt.h:43</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="classtvm_1_1SHashReducer_html_a74260485bd50d1bfa52ded457a6a7777"><div class="ttname"><a href="classtvm_1_1SHashReducer.html#a74260485bd50d1bfa52ded457a6a7777">tvm::SHashReducer::DefHash</a></div><div class="ttdeci">void DefHash(const ObjectRef &amp;key) const</div><div class="ttdoc">Push hash of key to the current sequence of hash values. </div><div class="ttdef"><b>Definition:</b> structural_hash.h:179</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/reference/api/doxygen/ir_2attrs_8h_source.html b/docs/reference/api/doxygen/ir_2attrs_8h_source.html
index 4e6bd67c4..6a12c5354 100644
--- a/docs/reference/api/doxygen/ir_2attrs_8h_source.html
+++ b/docs/reference/api/doxygen/ir_2attrs_8h_source.html
@@ -96,7 +96,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_html_a2acb6435cb5299a480bfb81d3d9ce37c"><div class="ttname"><a href="namespacetvm.html#a2acb6435cb5299a480bfb81d3d9ce37c">tvm::WithAttr</a></div><div class="ttdeci">TFunc WithAttr(TFunc input, const std::string &amp;attr_key, ObjectRef attr_value)</div><div class="ttdoc">Copy the function or module, but overrides the attribute value key with the value. </div><div class="ttdef"><b>Definition:</b> attrs.h:347</div></div>
 <div class="ttc" id="structural__equal_8h_html"><div class="ttname"><a href="structural__equal_8h.html">structural_equal.h</a></div><div class="ttdoc">Structural equality comparison. </div></div>
 <div class="ttc" id="structtvm_1_1detail_1_1AttrInitEntry_html_af07c4a3a8f4663ac03ae238ab7b9d791"><div class="ttname"><a href="structtvm_1_1detail_1_1AttrInitEntry.html#af07c4a3a8f4663ac03ae238ab7b9d791">tvm::detail::AttrInitEntry::AttrInitEntry</a></div><div class="ttdeci">AttrInitEntry(AttrInitEntry &amp;&amp;other)</div><div class="ttdef"><b>Definition:</b> attrs.h:520</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:321</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:535</div></div>
 <div class="ttc" id="namespacetvm_1_1detail_html_a2565d82719660de2ab39178c917d6628"><div class="ttname"><a href="namespacetvm_1_1detail.html#a2565d82719660de2ab39178c917d6628">tvm::detail::SetValue&lt; double &gt;</a></div><div class="ttdeci">void SetValue&lt; double &gt;(double *ptr, const TVMArgValue &amp;val)</div><div class="ttdef"><b>Definition:</b> attrs.h:606</div></div>
 <div class="ttc" id="classtvm_1_1detail_1_1AttrExistVisitor_html_ac6ae7aa3d30f25a953810bcc0d0a938f"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrExistVisitor.html#ac6ae7aa3d30f25a953810bcc0d0a938f">tvm::detail::AttrExistVisitor::exist_</a></div><div class="ttdeci">bool exist_</div><div class="ttdef"><b>Definition:</b> attrs.h:775</div></div>
 <div class="ttc" id="classtvm_1_1detail_1_1AttrDocVisitor_html"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrDocVisitor.html">tvm::detail::AttrDocVisitor</a></div><div class="ttdef"><b>Definition:</b> attrs.h:758</div></div>
@@ -117,7 +117,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1detail_1_1AttrsSEqualVisitor_html_ac67ceda6a413da78e61fa91ca61fcf26"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html#ac67ceda6a413da78e61fa91ca61fcf26">tvm::detail::AttrsSEqualVisitor::AttrsSEqualVisitor</a></div><div class="ttdeci">AttrsSEqualVisitor(const Object *lhs, const Object *rhs, const SEqualReducer &amp;equal)</div><div class="ttdef"><b>Definition:</b> attrs.h:467</div></div>
 <div class="ttc" id="classtvm_1_1DictAttrsNode_html"><div class="ttname"><a href="classtvm_1_1DictAttrsNode.html">tvm::DictAttrsNode</a></div><div class="ttdoc">Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...</div><div class="ttdef"><b>Definition:</b> attrs.h:201</div></div>
 <div class="ttc" id="classtvm_1_1detail_1_1AttrDocVisitor_html_a1091752fc7d78b471b034877ad9344b3"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrDocVisitor.html#a1091752fc7d78b471b034877ad9344b3">tvm::detail::AttrDocVisitor::operator()</a></div><div class="ttdeci">AttrDocEntry operator()(const char *key, T *v)</div><div class="ttdef"><b>Definition:</b> attrs.h:761</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="classtvm_1_1StructuralEqual_html"><div class="ttname"><a href="classtvm_1_1StructuralEqual.html">tvm::StructuralEqual</a></div><div class="ttdoc">Content-aware structural equality comparator for objects. </div><div class="ttdef"><b>Definition:</b> structural_equal.h:81</div></div>
 <div class="ttc" id="classtvm_1_1DictAttrs_html_af021c0a1d934693d21cb93d4bfd60aca"><div class="ttname"><a href="classtvm_1_1DictAttrs.html#af021c0a1d934693d21cb93d4bfd60aca">tvm::DictAttrs::GetAttr</a></div><div class="ttdeci">Optional&lt; TObjectRef &gt; GetAttr(const std::string &amp;attr_key, TObjectRef default_value) const</div><div class="ttdef"><b>Definition:</b> attrs.h:276</div></div>
 <div class="ttc" id="structtvm_1_1detail_1_1TypeName_html"><div class="ttname"><a href="structtvm_1_1detail_1_1TypeName.html">tvm::detail::TypeName</a></div><div class="ttdoc">Helper struct to get the type name known to tvm. </div><div class="ttdef"><b>Definition:</b> attrs.h:685</div></div>
@@ -142,7 +142,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1detail_1_1AttrDocEntry_html_aec039b071d826ab164c5abe123aefaa3"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrDocEntry.html#aec039b071d826ab164c5abe123aefaa3">tvm::detail::AttrDocEntry::set_upper_bound</a></div><div class="ttdeci">TSelf &amp; set_upper_bound(DMLC_ATTRIBUTE_UNUSED T end)</div><div class="ttdef"><b>Definition:</b> attrs.h:750</div></div>
 <div class="ttc" id="classtvm_1_1BaseAttrsNode_html_a225581a40231b2de219da30fced428a2"><div class="ttname"><a href="classtvm_1_1BaseAttrsNode.html#a225581a40231b2de219da30fced428a2">tvm::BaseAttrsNode::~BaseAttrsNode</a></div><div class="ttdeci">virtual ~BaseAttrsNode()</div><div class="ttdoc">virtual destructor </div><div class="ttdef"><b>Definition:</b> attrs.h:144</div></div>
 <div class="ttc" id="namespacetvm_html_a7e2bc626db8be997b1562c79df3d9e11"><div class="ttname"><a href="namespacetvm.html#a7e2bc626db8be997b1562c79df3d9e11">tvm::WithoutAttr</a></div><div class="ttdeci">TFunc WithoutAttr(TFunc input, const std::string &amp;attr_key)</div><div class="ttdoc">Copy the function or module, but removes the specified attribute. </div><div class="ttdef"><b>Definition:</b> attrs.h:412</div></div>
-<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
+<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:518</div></div>
 <div class="ttc" id="structtvm_1_1detail_1_1AttrNopEntry_html_a370e92bafbada9ba805a52e72881f98b"><div class="ttname"><a href="structtvm_1_1detail_1_1AttrNopEntry.html#a370e92bafbada9ba805a52e72881f98b">tvm::detail::AttrNopEntry::set_default</a></div><div class="ttdeci">TSelf &amp; set_default(DMLC_ATTRIBUTE_UNUSED const T &amp;value)</div><div class="ttdef"><b>Definition:</b> attrs.h:436</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1detail_1_1AttrInitVisitor_html_ac3c800c9249fee195db2a5fa473fe960"><div class="ttname"><a href="classtvm_1_1detail_1_1AttrInitVisitor.html#ac3c800c9249fee195db2a5fa473fe960">tvm::detail::AttrInitVisitor::AttrInitVisitor</a></div><div class="ttdeci">AttrInitVisitor(const char *type_key, FFind ffind)</div><div class="ttdef"><b>Definition:</b> attrs.h:646</div></div>
diff --git a/docs/reference/api/doxygen/ir_2expr_8h.html b/docs/reference/api/doxygen/ir_2expr_8h.html
index f78fe6056..00ffaf139 100644
--- a/docs/reference/api/doxygen/ir_2expr_8h.html
+++ b/docs/reference/api/doxygen/ir_2expr_8h.html
@@ -164,6 +164,66 @@ Namespaces</h2></td></tr>
 </table><table class="memberdecls">
 <tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="func-members"></a>
 Functions</h2></td></tr>
+<tr class="memitem:af246f441d4ac21b110185b77240b2dcc"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">tvm::operator+</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:af246f441d4ac21b110185b77240b2dcc"><td class="mdescLeft">&#160;</td><td class="mdescRight">add operator  <a href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">More...</a><br /></td></tr>
+<tr class="separator:af246f441d4ac21b110185b77240b2dcc"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abde487c0197942c4ebb1b47277b89dac"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">tvm::operator-</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:abde487c0197942c4ebb1b47277b89dac"><td class="mdescLeft">&#160;</td><td class="mdescRight">subtraction operator  <a href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">More...</a><br /></td></tr>
+<tr class="separator:abde487c0197942c4ebb1b47277b89dac"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abc417454badf61b154d6a8d87cd8f171"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">tvm::operator-</a> (PrimExpr a)</td></tr>
+<tr class="memdesc:abc417454badf61b154d6a8d87cd8f171"><td class="mdescLeft">&#160;</td><td class="mdescRight">negation.  <a href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">More...</a><br /></td></tr>
+<tr class="separator:abc417454badf61b154d6a8d87cd8f171"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">tvm::operator*</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a5c5034de2993b9130b7bd9d593a11bb5"><td class="mdescLeft">&#160;</td><td class="mdescRight">multiplication operator  <a href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">More...</a><br /></td></tr>
+<tr class="separator:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a18256ba1213ce5ff3cf8037a314354b7"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">tvm::operator/</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a18256ba1213ce5ff3cf8037a314354b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">division operator  <a href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">More...</a><br /></td></tr>
+<tr class="separator:a18256ba1213ce5ff3cf8037a314354b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:af682776c3609284f1bc3ea436e21a67a"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">tvm::operator&lt;&lt;</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:af682776c3609284f1bc3ea436e21a67a"><td class="mdescLeft">&#160;</td><td class="mdescRight">left shift operator  <a href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">More...</a><br /></td></tr>
+<tr class="separator:af682776c3609284f1bc3ea436e21a67a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">tvm::operator&gt;&gt;</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="mdescLeft">&#160;</td><td class="mdescRight">right shift operator  <a href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">More...</a><br /></td></tr>
+<tr class="separator:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ad93d00f7b080dc3f905f5c34c170a041"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">tvm::operator&gt;</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:ad93d00f7b080dc3f905f5c34c170a041"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater  <a href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">More...</a><br /></td></tr>
+<tr class="separator:ad93d00f7b080dc3f905f5c34c170a041"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a5530417da455bd46f5dc55f27d69bcdf"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">tvm::operator&gt;=</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a5530417da455bd46f5dc55f27d69bcdf"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater_equal  <a href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">More...</a><br /></td></tr>
+<tr class="separator:a5530417da455bd46f5dc55f27d69bcdf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">tvm::operator&lt;</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="mdescLeft">&#160;</td><td class="mdescRight">less  <a href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">More...</a><br /></td></tr>
+<tr class="separator:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a598f8139c469abc4066dbdd0a0a0845d"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">tvm::operator&lt;=</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a598f8139c469abc4066dbdd0a0a0845d"><td class="mdescLeft">&#160;</td><td class="mdescRight">less_equal  <a href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">More...</a><br /></td></tr>
+<tr class="separator:a598f8139c469abc4066dbdd0a0a0845d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">tvm::operator==</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a2ea3b45c96d3980227e418f7158ce5c3"><td class="mdescLeft">&#160;</td><td class="mdescRight">equal  <a href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">More...</a><br /></td></tr>
+<tr class="separator:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a03983cf66713724c138f9697bb8e0e97"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">tvm::operator!=</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a03983cf66713724c138f9697bb8e0e97"><td class="mdescLeft">&#160;</td><td class="mdescRight">not_equal  <a href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">More...</a><br /></td></tr>
+<tr class="separator:a03983cf66713724c138f9697bb8e0e97"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a7579d33e0aac9600dec46264a3f1edb8"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">tvm::operator &amp;&amp;</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a7579d33e0aac9600dec46264a3f1edb8"><td class="mdescLeft">&#160;</td><td class="mdescRight">and  <a href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">More...</a><br /></td></tr>
+<tr class="separator:a7579d33e0aac9600dec46264a3f1edb8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">tvm::operator||</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="mdescLeft">&#160;</td><td class="mdescRight">or  <a href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">More...</a><br /></td></tr>
+<tr class="separator:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ab354bf1270121abea71fade83f13b0b0"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">tvm::operator!</a> (PrimExpr a)</td></tr>
+<tr class="memdesc:ab354bf1270121abea71fade83f13b0b0"><td class="mdescLeft">&#160;</td><td class="mdescRight">not  <a href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">More...</a><br /></td></tr>
+<tr class="separator:ab354bf1270121abea71fade83f13b0b0"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2a1269a38e7e3621eb2906a47157106a"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">tvm::operator &amp;</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a2a1269a38e7e3621eb2906a47157106a"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise and of two values  <a href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">More...</a><br /></td></tr>
+<tr class="separator:a2a1269a38e7e3621eb2906a47157106a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a236d9aae385e6697874f75e4c8a69f8d"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">tvm::operator|</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:a236d9aae385e6697874f75e4c8a69f8d"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise or of two values  <a href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">More...</a><br /></td></tr>
+<tr class="separator:a236d9aae385e6697874f75e4c8a69f8d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">tvm::operator^</a> (PrimExpr a, PrimExpr b)</td></tr>
+<tr class="memdesc:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise xor of two values  <a href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">More...</a><br /></td></tr>
+<tr class="separator:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a354b9954ff25dd819a51d856fdd38827"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">tvm::operator~</a> (PrimExpr a)</td></tr>
+<tr class="memdesc:a354b9954ff25dd819a51d856fdd38827"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise negation of two values  <a href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">More...</a><br /></td></tr>
+<tr class="separator:a354b9954ff25dd819a51d856fdd38827"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a002710a4652156a57495e10a09b5d002"><td class="memItemLeft" align="right" valign="top">Bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">tvm::operator||</a> (const Bool &amp;a, bool b)</td></tr>
 <tr class="separator:a002710a4652156a57495e10a09b5d002"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a4c8c1c1c248859ce0d20f614e18a9524"><td class="memItemLeft" align="right" valign="top">Bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a4c8c1c1c248859ce0d20f614e18a9524">tvm::operator||</a> (bool a, const Bool &amp;b)</td></tr>
diff --git a/docs/reference/api/doxygen/ir_2expr_8h_source.html b/docs/reference/api/doxygen/ir_2expr_8h_source.html
index f08d2bb83..76a2ceaf1 100644
--- a/docs/reference/api/doxygen/ir_2expr_8h_source.html
+++ b/docs/reference/api/doxygen/ir_2expr_8h_source.html
@@ -66,106 +66,122 @@ $(function() {
 <div class="title">expr.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="ir_2expr_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more co [...]
+<a href="ir_2expr_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more co [...]
+<div class="ttc" id="namespacetvm_html_a03983cf66713724c138f9697bb8e0e97"><div class="ttname"><a href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">tvm::operator!=</a></div><div class="ttdeci">PrimExpr operator!=(PrimExpr a, PrimExpr b)</div><div class="ttdoc">not_equal </div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_af40ca6124bc2e88f2323eeb79d326cc0"><div class="ttname"><a href="namespacetvm_1_1relay.html#af40ca6124bc2e88f2323eeb79d326cc0">tvm::relay::Span</a></div><div class="ttdeci">tvm::Span Span</div><div class="ttdef"><b>Definition:</b> base.h:65</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_a0cc4e898dbc3b52d785fc3e515663c61"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#a0cc4e898dbc3b52d785fc3e515663c61">tvm::BaseExprNode::_type_key</a></div><div class="ttdeci">static constexpr const char * _type_key</div><div class="ttdef"><b>Definition:</b> expr.h:57</div></div>
 <div class="ttc" id="classtvm_1_1SHashReducer_html_a6cab7ab9b4adbab253001be6d3b4cdfe"><div class="ttname"><a href="classtvm_1_1SHashReducer.html#a6cab7ab9b4adbab253001be6d3b4cdfe">tvm::SHashReducer::FreeVarHashImpl</a></div><div class="ttdeci">void FreeVarHashImpl(const runtime::Object *var) const</div><div class="ttdoc">Implementation for hash for a free var. </div><div class="ttdef"><b>Definition:</b> structural_hash.h:185</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html_a88ce961273d224831b187d8e9f149e08"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#a88ce961273d224831b187d8e9f149e08">tvm::FloatImmNode::value</a></div><div class="ttdeci">double value</div><div class="ttdoc">The constant value content. </div><div class="ttdef"><b>Definition:</b> expr.h:324</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_a43d2fb12bb61cf05936a1972d0158b49"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a43d2fb12bb61cf05936a1972d0158b49">tvm::RangeNode::min</a></div><div class="ttdeci">PrimExpr min</div><div class="ttdoc">beginning of the node </div><div class="ttdef"><b>Definition:</b> expr.h:465</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_aca36f6077174fd8361d57c71bac371e2"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#aca36f6077174fd8361d57c71bac371e2">tvm::RelayExprNode::checked_type</a></div><div class="ttdeci">const Type &amp; checked_type() const</div><div class="ttdef"><b>Definition:</b> expr.h:522</div></div>
-<div class="ttc" id="namespacetvm_html_a242b37bc39f3fc56d29e36f916cc1483"><div class="ttname"><a href="namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483">tvm::operator &amp;&amp;</a></div><div class="ttdeci">Bool operator &amp;&amp;(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:384</div></div>
+<div class="ttc" id="namespacetvm_html_a1f98476c3a413f6cdfc7b7e490f3221b"><div class="ttname"><a href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">tvm::operator&lt;</a></div><div class="ttdeci">PrimExpr operator&lt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">less </div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html_a88ce961273d224831b187d8e9f149e08"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#a88ce961273d224831b187d8e9f149e08">tvm::FloatImmNode::value</a></div><div class="ttdeci">double value</div><div class="ttdoc">The constant value content. </div><div class="ttdef"><b>Definition:</b> expr.h:538</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_a43d2fb12bb61cf05936a1972d0158b49"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a43d2fb12bb61cf05936a1972d0158b49">tvm::RangeNode::min</a></div><div class="ttdeci">PrimExpr min</div><div class="ttdoc">beginning of the node </div><div class="ttdef"><b>Definition:</b> expr.h:679</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_aca36f6077174fd8361d57c71bac371e2"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#aca36f6077174fd8361d57c71bac371e2">tvm::RelayExprNode::checked_type</a></div><div class="ttdeci">const Type &amp; checked_type() const</div><div class="ttdef"><b>Definition:</b> expr.h:736</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html">tvm::runtime::ObjectPtr</a></div><div class="ttdoc">A custom smart pointer for Object. </div><div class="ttdef"><b>Definition:</b> object.h:358</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:369</div></div>
+<div class="ttc" id="namespacetvm_html_ac3bf2ef3556c995846dddcd84e5db8a6"><div class="ttname"><a href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">tvm::operator||</a></div><div class="ttdeci">PrimExpr operator||(PrimExpr a, PrimExpr b)</div><div class="ttdoc">or </div></div>
+<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:583</div></div>
 <div class="ttc" id="node_8h_html"><div class="ttname"><a href="node_8h.html">node.h</a></div><div class="ttdoc">Definitions and helper macros for IR/AST nodes. </div></div>
-<div class="ttc" id="namespacetvm_html_a002710a4652156a57495e10a09b5d002"><div class="ttname"><a href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">tvm::operator||</a></div><div class="ttdeci">Bool operator||(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:379</div></div>
 <div class="ttc" id="string_8h_html"><div class="ttname"><a href="string_8h.html">string.h</a></div><div class="ttdoc">Runtime String container types. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html">tvm::runtime::TVMPODValue_</a></div><div class="ttdoc">Internal base class to handle conversion to POD values. </div><div class="ttdef"><b>Definition:</b> packed_func.h:541</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_a7903313f813a5cb7660a5448e2eeb2b4"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a7903313f813a5cb7660a5448e2eeb2b4">tvm::RangeNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:485</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_a7903313f813a5cb7660a5448e2eeb2b4"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a7903313f813a5cb7660a5448e2eeb2b4">tvm::RangeNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:699</div></div>
 <div class="ttc" id="classtvm_1_1SEqualReducer_html_ad9c0e2985eb9d82eecc67ea0f008154c"><div class="ttname"><a href="classtvm_1_1SEqualReducer.html#ad9c0e2985eb9d82eecc67ea0f008154c">tvm::SEqualReducer::FreeVarEqualImpl</a></div><div class="ttdeci">bool FreeVarEqualImpl(const runtime::Object *lhs, const runtime::Object *rhs) const</div><div class="ttdoc">Implementation for equality rule of var type objects(e.g. TypeVar, tir::Var). </div><div class="ttdef"><b>Definition:</b> structural_equ [...]
 <div class="ttc" id="classtvm_1_1SEqualReducer_html"><div class="ttname"><a href="classtvm_1_1SEqualReducer.html">tvm::SEqualReducer</a></div><div class="ttdoc">A Reducer class to reduce the structural equality result of two objects. </div><div class="ttdef"><b>Definition:</b> structural_equal.h:102</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_a13d7d1f1e6f790951caf200cd59c3620"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#a13d7d1f1e6f790951caf200cd59c3620">tvm::BaseExprNode::_type_has_method_shash_reduce</a></div><div class="ttdeci">static constexpr const bool _type_has_method_shash_reduce</div><div class="ttdef"><b>Definition:</b> expr.h:59</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_a2d3969d98441b5b2ee5d8a986a56c410"><div class="ttname"><a href="classtvm_1_1Integer.html#a2d3969d98441b5b2ee5d8a986a56c410">tvm::Integer::Integer</a></div><div class="ttdeci">Integer()</div><div class="ttdef"><b>Definition:</b> expr.h:406</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html_ab82974132026f07d89afcf409a2ca616"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#ab82974132026f07d89afcf409a2ca616">tvm::GlobalVarNode::name_hint</a></div><div class="ttdeci">String name_hint</div><div class="ttdoc">The name of the variable, this only acts as a hint. </div><div class="ttdef"><b>Definition:</b> expr.h:234</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_a2d3969d98441b5b2ee5d8a986a56c410"><div class="ttname"><a href="classtvm_1_1Integer.html#a2d3969d98441b5b2ee5d8a986a56c410">tvm::Integer::Integer</a></div><div class="ttdeci">Integer()</div><div class="ttdef"><b>Definition:</b> expr.h:620</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html_ab82974132026f07d89afcf409a2ca616"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#ab82974132026f07d89afcf409a2ca616">tvm::GlobalVarNode::name_hint</a></div><div class="ttdeci">String name_hint</div><div class="ttdoc">The name of the variable, this only acts as a hint. </div><div class="ttdef"><b>Definition:</b> expr.h:448</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_aee167e954efc66eec2b2fecfcfc7e3d5"><div class="ttname"><a href="classtvm_1_1RangeNode.html#aee167e954efc66eec2b2fecfcfc7e3d5">tvm::RangeNode::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">the location of this range in the source </div><div class="ttdef"><b>Definition:</b> expr.h:469</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_aee167e954efc66eec2b2fecfcfc7e3d5"><div class="ttname"><a href="classtvm_1_1RangeNode.html#aee167e954efc66eec2b2fecfcfc7e3d5">tvm::RangeNode::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">the location of this range in the source </div><div class="ttdef"><b>Definition:</b> expr.h:683</div></div>
 <div class="ttc" id="classtvm_1_1SHashReducer_html"><div class="ttname"><a href="classtvm_1_1SHashReducer.html">tvm::SHashReducer</a></div><div class="ttdoc">A Reducer class to reduce the structural hash value. </div><div class="ttdef"><b>Definition:</b> structural_hash.h:102</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793a72d85fbd104ae87b054f93ddd8e1476b"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793a72d85fbd104ae87b054f93ddd8e1476b">kTVMArgInt</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:111</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_a5b9ad6d47c7c6df5a066d58f6ba65f8e"><div class="ttname"><a href="classtvm_1_1Integer.html#a5b9ad6d47c7c6df5a066d58f6ba65f8e">tvm::Integer::operator==</a></div><div class="ttdeci">Bool operator==(int other) const</div><div class="ttdef"><b>Definition:</b> expr.h:446</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_a5b9ad6d47c7c6df5a066d58f6ba65f8e"><div class="ttname"><a href="classtvm_1_1Integer.html#a5b9ad6d47c7c6df5a066d58f6ba65f8e">tvm::Integer::operator==</a></div><div class="ttdeci">Bool operator==(int other) const</div><div class="ttdef"><b>Definition:</b> expr.h:660</div></div>
 <div class="ttc" id="namespacetvm_html_a1c4f14382b85bcfa57d9a3460db2354a"><div class="ttname"><a href="namespacetvm.html#a1c4f14382b85bcfa57d9a3460db2354a">tvm::equal</a></div><div class="ttdeci">PrimExpr equal(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">equal </div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:321</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:535</div></div>
 <div class="ttc" id="namespacestd_html"><div class="ttname"><a href="namespacestd.html">std</a></div><div class="ttdef"><b>Definition:</b> loop_state.h:456</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html_afcc3cda21d5cb01b0624a9c572245ee5"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#afcc3cda21d5cb01b0624a9c572245ee5">tvm::IntImmNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const IntImmNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:286</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html_afcc3cda21d5cb01b0624a9c572245ee5"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#afcc3cda21d5cb01b0624a9c572245ee5">tvm::IntImmNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const IntImmNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:500</div></div>
 <div class="ttc" id="classtvm_1_1PrimExpr_html_a18a8a0fef87b74b18fc254f49465cc56"><div class="ttname"><a href="classtvm_1_1PrimExpr.html#a18a8a0fef87b74b18fc254f49465cc56">tvm::PrimExpr::dtype</a></div><div class="ttdeci">DataType dtype() const</div><div class="ttdef"><b>Definition:</b> expr.h:126</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_afc4e496d3fdfd643d04e9d75c0977021"><div class="ttname"><a href="classtvm_1_1Integer.html#afc4e496d3fdfd643d04e9d75c0977021">tvm::Integer::Integer</a></div><div class="ttdeci">Integer(int value, Span span=Span())</div><div class="ttdoc">Construct integer from int value. </div><div class="ttdef"><b>Definition:</b> expr.h:414</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_afc4e496d3fdfd643d04e9d75c0977021"><div class="ttname"><a href="classtvm_1_1Integer.html#afc4e496d3fdfd643d04e9d75c0977021">tvm::Integer::Integer</a></div><div class="ttdeci">Integer(int value, Span span=Span())</div><div class="ttdoc">Construct integer from int value. </div><div class="ttdef"><b>Definition:</b> expr.h:628</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html">tvm::runtime::Object</a></div><div class="ttdoc">base class of all object containers. </div><div class="ttdef"><b>Definition:</b> object.h:167</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_ad538a2ae6f636b3ce38fb4162b1c2549"><div class="ttname"><a href="classtvm_1_1Integer.html#ad538a2ae6f636b3ce38fb4162b1c2549">tvm::Integer::operator=</a></div><div class="ttdeci">Integer &amp; operator=(const IntImm &amp;other)</div><div class="ttdoc">Assign an expression to integer. </div><div class="ttdef"><b>Definition:</b> expr.h:434</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_a962f080ada680921fbfff0846553ec33"><div class="ttname"><a href="classtvm_1_1Integer.html#a962f080ada680921fbfff0846553ec33">tvm::Integer::Integer</a></div><div class="ttdeci">Integer(IntImm other)</div><div class="ttdoc">Construct integer from int imm. </div><div class="ttdef"><b>Definition:</b> expr.h:419</div></div>
+<div class="ttc" id="namespacetvm_html_abde487c0197942c4ebb1b47277b89dac"><div class="ttname"><a href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">tvm::operator-</a></div><div class="ttdeci">PrimExpr operator-(PrimExpr a, PrimExpr b)</div><div class="ttdoc">subtraction operator </div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_ad538a2ae6f636b3ce38fb4162b1c2549"><div class="ttname"><a href="classtvm_1_1Integer.html#ad538a2ae6f636b3ce38fb4162b1c2549">tvm::Integer::operator=</a></div><div class="ttdeci">Integer &amp; operator=(const IntImm &amp;other)</div><div class="ttdoc">Assign an expression to integer. </div><div class="ttdef"><b>Definition:</b> expr.h:648</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_a962f080ada680921fbfff0846553ec33"><div class="ttname"><a href="classtvm_1_1Integer.html#a962f080ada680921fbfff0846553ec33">tvm::Integer::Integer</a></div><div class="ttdeci">Integer(IntImm other)</div><div class="ttdoc">Construct integer from int imm. </div><div class="ttdef"><b>Definition:</b> expr.h:633</div></div>
 <div class="ttc" id="classtvm_1_1BaseExpr_html"><div class="ttname"><a href="classtvm_1_1BaseExpr.html">tvm::BaseExpr</a></div><div class="ttdoc">Managed reference to BaseExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:68</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_acd2ddcfab1f591a6c1f2568dd0180845"><div class="ttname"><a href="classtvm_1_1RangeNode.html#acd2ddcfab1f591a6c1f2568dd0180845">tvm::RangeNode::extent</a></div><div class="ttdeci">PrimExpr extent</div><div class="ttdoc">the extend of range </div><div class="ttdef"><b>Definition:</b> expr.h:467</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_acd2ddcfab1f591a6c1f2568dd0180845"><div class="ttname"><a href="classtvm_1_1RangeNode.html#acd2ddcfab1f591a6c1f2568dd0180845">tvm::RangeNode::extent</a></div><div class="ttdeci">PrimExpr extent</div><div class="ttdoc">the extend of range </div><div class="ttdef"><b>Definition:</b> expr.h:681</div></div>
+<div class="ttc" id="namespacetvm_html_a7579d33e0aac9600dec46264a3f1edb8"><div class="ttname"><a href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">tvm::operator &amp;&amp;</a></div><div class="ttdeci">PrimExpr operator &amp;&amp;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">and </div></div>
 <div class="ttc" id="ir_2span_8h_html"><div class="ttname"><a href="ir_2span_8h.html">span.h</a></div><div class="ttdoc">Span information for debugging purposes. </div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_a262bc171fdf52db5f34e3c8446fb81aa"><div class="ttname"><a href="classtvm_1_1Integer.html#a262bc171fdf52db5f34e3c8446fb81aa">tvm::Integer::Integer</a></div><div class="ttdeci">Integer(ObjectPtr&lt; Object &gt; node)</div><div class="ttdoc">constructor from node. </div><div class="ttdef"><b>Definition:</b> expr.h:410</div></div>
-<div class="ttc" id="classtvm_1_1FloatImm_html"><div class="ttname"><a href="classtvm_1_1FloatImm.html">tvm::FloatImm</a></div><div class="ttdoc">Managed reference class to FloatImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:350</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_a262bc171fdf52db5f34e3c8446fb81aa"><div class="ttname"><a href="classtvm_1_1Integer.html#a262bc171fdf52db5f34e3c8446fb81aa">tvm::Integer::Integer</a></div><div class="ttdeci">Integer(ObjectPtr&lt; Object &gt; node)</div><div class="ttdoc">constructor from node. </div><div class="ttdef"><b>Definition:</b> expr.h:624</div></div>
+<div class="ttc" id="classtvm_1_1FloatImm_html"><div class="ttname"><a href="classtvm_1_1FloatImm.html">tvm::FloatImm</a></div><div class="ttdoc">Managed reference class to FloatImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:564</div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Bool_01_4_html_a8afe0a2b788c12dcebec930db368f8cc"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Bool_01_4.html#a8afe0a2b788c12dcebec930db368f8cc">tvm::runtime::PackedFuncValueConverter&lt; tvm::Bool &gt;::From</a></div><div class="ttdeci">static tvm::Bool From(const TVMPODValue_ &amp;val)</div><div class="ttdef"><b>Definition:</b> expr.h:578</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html_a0a43d0301a3e9e481b2c3ad723df9ea0"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#a0a43d0301a3e9e481b2c3ad723df9ea0">tvm::FloatImmNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const FloatImmNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:332</div></div>
+<div class="ttc" id="namespacetvm_html_a2a1269a38e7e3621eb2906a47157106a"><div class="ttname"><a href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">tvm::operator &amp;</a></div><div class="ttdeci">PrimExpr operator &amp;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take bitwise and of two values </div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Bool_01_4_html_a8afe0a2b788c12dcebec930db368f8cc"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Bool_01_4.html#a8afe0a2b788c12dcebec930db368f8cc">tvm::runtime::PackedFuncValueConverter&lt; tvm::Bool &gt;::From</a></div><div class="ttdeci">static tvm::Bool From(const TVMPODValue_ &amp;val)</div><div class="ttdef"><b>Definition:</b> expr.h:792</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html_a0a43d0301a3e9e481b2c3ad723df9ea0"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#a0a43d0301a3e9e481b2c3ad723df9ea0">tvm::FloatImmNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const FloatImmNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:546</div></div>
 <div class="ttc" id="classtvm_1_1VirtualDevice_html"><div class="ttname"><a href="classtvm_1_1VirtualDevice.html">tvm::VirtualDevice</a></div><div class="ttdoc">Managed reference class to VirtualDeviceNode. </div><div class="ttdef"><b>Definition:</b> virtual_device.h:261</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1Span_html"><div class="ttname"><a href="classtvm_1_1Span.html">tvm::Span</a></div><div class="ttdef"><b>Definition:</b> span.h:115</div></div>
+<div class="ttc" id="namespacetvm_html_ab354bf1270121abea71fade83f13b0b0"><div class="ttname"><a href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">tvm::operator!</a></div><div class="ttdeci">PrimExpr operator!(PrimExpr a)</div><div class="ttdoc">not </div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_ae3a0760a9f8b1379bc86f13e8bb3a22e"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#ae3a0760a9f8b1379bc86f13e8bb3a22e">tvm::BaseExprNode::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">Span that points to the original source code. Reserved debug information. </div><div class="ttdef"><b>Definition:</b> expr.h:55</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:441</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_abae4eff906166502c2a982e8a121d81d"><div class="ttname"><a href="classtvm_1_1Integer.html#abae4eff906166502c2a982e8a121d81d">tvm::Integer::IntValue</a></div><div class="ttdeci">int64_t IntValue() const</div><div class="ttdoc">convert to int64_t </div><div class="ttdef"><b>Definition:</b> expr.h:655</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_a1c4db1562af2034749bc929ed00600a3"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#a1c4db1562af2034749bc929ed00600a3">tvm::BaseExprNode::_type_child_slots</a></div><div class="ttdeci">static constexpr const uint32_t _type_child_slots</div><div class="ttdef"><b>Definition:</b> expr.h:60</div></div>
 <div class="ttc" id="ir_2type_8h_html"><div class="ttname"><a href="ir_2type_8h.html">type.h</a></div><div class="ttdoc">IR/AST nodes for the unified type system in TVM. </div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html_a66d9b6fa3bc4a3d62e0b91cafe93de8f"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#a66d9b6fa3bc4a3d62e0b91cafe93de8f">tvm::GlobalVarNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:248</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_a53988be7b3181aa3b55eb991b615c48d"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a53988be7b3181aa3b55eb991b615c48d">tvm::RangeNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const RangeNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:481</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html_a66d9b6fa3bc4a3d62e0b91cafe93de8f"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#a66d9b6fa3bc4a3d62e0b91cafe93de8f">tvm::GlobalVarNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:462</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_a53988be7b3181aa3b55eb991b615c48d"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a53988be7b3181aa3b55eb991b615c48d">tvm::RangeNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const RangeNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:695</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html">tvm::runtime::DataType</a></div><div class="ttdoc">Runtime primitive data type. </div><div class="ttdef"><b>Definition:</b> data_type.h:41</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html">tvm::BaseExprNode</a></div><div class="ttdoc">Base type of all the expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:49</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a81ac7c3d0824529fddce7849c9c66289"><div class="ttname"><a href="namespacetvm_1_1relay.html#a81ac7c3d0824529fddce7849c9c66289">tvm::relay::GlobalVar</a></div><div class="ttdeci">tvm::GlobalVar GlobalVar</div><div class="ttdef"><b>Definition:</b> expr.h:58</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4_html_aa071662c3084d7ad3322351cb44c3dbf"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4.html#aa071662c3084d7ad3322351cb44c3dbf">tvm::runtime::PackedFuncValueConverter&lt; PrimExpr &gt;::From</a></div><div class="ttdeci">static PrimExpr From(const TVMPODValue_ &amp;val)</div><div class="ttdef"><b>Definition:</b> expr.h:548</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4_html_aa071662c3084d7ad3322351cb44c3dbf"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4.html#aa071662c3084d7ad3322351cb44c3dbf">tvm::runtime::PackedFuncValueConverter&lt; PrimExpr &gt;::From</a></div><div class="ttdeci">static PrimExpr From(const TVMPODValue_ &amp;val)</div><div class="ttdef"><b>Definition:</b> expr.h:762</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_a905dcf65204e877b6ccb977cf375f2a0"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#a905dcf65204e877b6ccb977cf375f2a0">tvm::BaseExprNode::_type_has_method_sequal_reduce</a></div><div class="ttdeci">static constexpr const bool _type_has_method_sequal_reduce</div><div class="ttdef"><b>Definition:</b> expr.h:58</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html_a74569b541c1056734fff07a23a05558e"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#a74569b541c1056734fff07a23a05558e">tvm::FloatImmNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:326</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html_a00a5153c31270c6ec308a516cd46f7fb"><div class="ttname"><a href="classtvm_1_1Bool.html#a00a5153c31270c6ec308a516cd46f7fb">tvm::Bool::operator!</a></div><div class="ttdeci">Bool operator!() const</div><div class="ttdef"><b>Definition:</b> expr.h:372</div></div>
-<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:261</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_a4bbc33969cb484c20306da1d2b9fa1fd"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a4bbc33969cb484c20306da1d2b9fa1fd">tvm::RangeNode::RangeNode</a></div><div class="ttdeci">RangeNode(PrimExpr min, PrimExpr extent, Span span=Span())</div><div class="ttdef"><b>Definition:</b> expr.h:472</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ad6ea21e1a03d972ac5cf81b80b88b2c4"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ad6ea21e1a03d972ac5cf81b80b88b2c4">tvm::RelayExprNode::virtual_device_</a></div><div class="ttdeci">ObjectRef virtual_device_</div><div class="ttdoc">The virtual device (VirtualDevice) for this node (the result of device planning). For first-order exp...</div><div class="ttdef"><b>Definition:</b> expr.h:193</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html_a74569b541c1056734fff07a23a05558e"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#a74569b541c1056734fff07a23a05558e">tvm::FloatImmNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:540</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html_a00a5153c31270c6ec308a516cd46f7fb"><div class="ttname"><a href="classtvm_1_1Bool.html#a00a5153c31270c6ec308a516cd46f7fb">tvm::Bool::operator!</a></div><div class="ttdeci">Bool operator!() const</div><div class="ttdef"><b>Definition:</b> expr.h:586</div></div>
+<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:518</div></div>
+<div class="ttc" id="namespacetvm_html_af682776c3609284f1bc3ea436e21a67a"><div class="ttname"><a href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">tvm::operator&lt;&lt;</a></div><div class="ttdeci">PrimExpr operator&lt;&lt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">left shift operator </div></div>
+<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:475</div></div>
+<div class="ttc" id="namespacetvm_html_abd7d1b3232218b25e2e0cf6ef699a65f"><div class="ttname"><a href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">tvm::operator^</a></div><div class="ttdeci">PrimExpr operator^(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take bitwise xor of two values </div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_a4bbc33969cb484c20306da1d2b9fa1fd"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a4bbc33969cb484c20306da1d2b9fa1fd">tvm::RangeNode::RangeNode</a></div><div class="ttdeci">RangeNode(PrimExpr min, PrimExpr extent, Span span=Span())</div><div class="ttdef"><b>Definition:</b> expr.h:686</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ad6ea21e1a03d972ac5cf81b80b88b2c4"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ad6ea21e1a03d972ac5cf81b80b88b2c4">tvm::RelayExprNode::virtual_device_</a></div><div class="ttdeci">ObjectRef virtual_device_</div><div class="ttdoc">The virtual device (VirtualDevice) for this node (the result of device planning). For first-order exp...</div><div class="ttdef"><b>Definition:</b> expr.h:407</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_a289a65bf255e3d192c60d7b5a8dcf097"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#a289a65bf255e3d192c60d7b5a8dcf097">tvm::runtime::TVMPODValue_::AsObjectRef</a></div><div class="ttdeci">TObjectRef AsObjectRef() const</div><div class="ttdef"><b>Definition:</b> packed_func.h:1823</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html_a81f4c116ffb5931fdd64639eacad415d"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a81f4c116ffb5931fdd64639eacad415d">tvm::IntImmNode::value</a></div><div class="ttdeci">int64_t value</div><div class="ttdoc">the Internal value. </div><div class="ttdef"><b>Definition:</b> expr.h:278</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html_a81f4c116ffb5931fdd64639eacad415d"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a81f4c116ffb5931fdd64639eacad415d">tvm::IntImmNode::value</a></div><div class="ttdeci">int64_t value</div><div class="ttdoc">the Internal value. </div><div class="ttdef"><b>Definition:</b> expr.h:492</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html_a39ccfd3964e6d132ad8d4e4d544b5949"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a39ccfd3964e6d132ad8d4e4d544b5949">tvm::IntImmNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:280</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html_ad9901ef1b92c8c604d98554c02b5c17e"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#ad9901ef1b92c8c604d98554c02b5c17e">tvm::GlobalVarNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const GlobalVarNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:243</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_a1e099442e256b242fe8e2e2a49c1dc33"><div class="ttname"><a href="classtvm_1_1Integer.html#a1e099442e256b242fe8e2e2a49c1dc33">tvm::Integer::operator!=</a></div><div class="ttdeci">Bool operator!=(Enum other) const</div><div class="ttdef"><b>Definition:</b> expr.h:456</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html_a39ccfd3964e6d132ad8d4e4d544b5949"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a39ccfd3964e6d132ad8d4e4d544b5949">tvm::IntImmNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:494</div></div>
+<div class="ttc" id="namespacetvm_html_a1ce1eb32fc9d76ebe5a6b8d185024d41"><div class="ttname"><a href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">tvm::operator&gt;&gt;</a></div><div class="ttdeci">PrimExpr operator&gt;&gt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">right shift operator </div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html_ad9901ef1b92c8c604d98554c02b5c17e"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#ad9901ef1b92c8c604d98554c02b5c17e">tvm::GlobalVarNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const GlobalVarNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:457</div></div>
+<div class="ttc" id="namespacetvm_html_a2ea3b45c96d3980227e418f7158ce5c3"><div class="ttname"><a href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">tvm::operator==</a></div><div class="ttdeci">PrimExpr operator==(PrimExpr a, PrimExpr b)</div><div class="ttdoc">equal </div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_a1e099442e256b242fe8e2e2a49c1dc33"><div class="ttname"><a href="classtvm_1_1Integer.html#a1e099442e256b242fe8e2e2a49c1dc33">tvm::Integer::operator!=</a></div><div class="ttdeci">Bool operator!=(Enum other) const</div><div class="ttdef"><b>Definition:</b> expr.h:670</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a661d95f170bca230773914caeef3fe52"><div class="ttname"><a href="namespacetvm_1_1relay.html#a661d95f170bca230773914caeef3fe52">tvm::relay::Type</a></div><div class="ttdeci">tvm::Type Type</div><div class="ttdef"><b>Definition:</b> type.h:47</div></div>
-<div class="ttc" id="namespacetvm_html_a8b12d0bb7c343e149ae0631a2577547b"><div class="ttname"><a href="namespacetvm.html#a8b12d0bb7c343e149ae0631a2577547b">tvm::operator==</a></div><div class="ttdeci">bool operator==(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:390</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html_ab9008e3e269a0e136b040ba3c6a906e0"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#ab9008e3e269a0e136b040ba3c6a906e0">tvm::IntImmNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:290</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_a49869e0f187d66dcd00568d3e953fc04"><div class="ttname"><a href="classtvm_1_1Integer.html#a49869e0f187d66dcd00568d3e953fc04">tvm::Integer::operator!=</a></div><div class="ttdeci">Bool operator!=(int other) const</div><div class="ttdef"><b>Definition:</b> expr.h:450</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html_ab9008e3e269a0e136b040ba3c6a906e0"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#ab9008e3e269a0e136b040ba3c6a906e0">tvm::IntImmNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:504</div></div>
+<div class="ttc" id="namespacetvm_html_a5530417da455bd46f5dc55f27d69bcdf"><div class="ttname"><a href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">tvm::operator&gt;=</a></div><div class="ttdeci">PrimExpr operator&gt;=(PrimExpr a, PrimExpr b)</div><div class="ttdoc">greater_equal </div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_a49869e0f187d66dcd00568d3e953fc04"><div class="ttname"><a href="classtvm_1_1Integer.html#a49869e0f187d66dcd00568d3e953fc04">tvm::Integer::operator!=</a></div><div class="ttdeci">Bool operator!=(int other) const</div><div class="ttdef"><b>Definition:</b> expr.h:664</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
 <div class="ttc" id="object_8h_html_af8330e3864503fb7c4133ae4d48fe4a2"><div class="ttname"><a href="object_8h.html#af8330e3864503fb7c4133ae4d48fe4a2">TVM_DEFINE_OBJECT_REF_COW_METHOD</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_COW_METHOD(ObjectName)</div><div class="ttdoc">Define CopyOnWrite function in an ObjectRef. </div><div class="ttdef"><b>Definition:</b> object.h:785</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html_af941ed2d88c434c0ad443fa5dcd25e98"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#af941ed2d88c434c0ad443fa5dcd25e98">tvm::FloatImmNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:336</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html_af941ed2d88c434c0ad443fa5dcd25e98"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html#af941ed2d88c434c0ad443fa5dcd25e98">tvm::FloatImmNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:550</div></div>
+<div class="ttc" id="namespacetvm_html_a5c5034de2993b9130b7bd9d593a11bb5"><div class="ttname"><a href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">tvm::operator*</a></div><div class="ttdeci">PrimExpr operator*(PrimExpr a, PrimExpr b)</div><div class="ttdoc">multiplication operator </div></div>
 <div class="ttc" id="object_8h_html"><div class="ttname"><a href="object_8h.html">object.h</a></div><div class="ttdoc">A managed object in the TVM runtime. </div></div>
 <div class="ttc" id="object_8h_html_a3aea9b3f65aeb9150c0fa7800e5573c6"><div class="ttname"><a href="object_8h.html#a3aea9b3f65aeb9150c0fa7800e5573c6">TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">#define TVM_DECLARE_FINAL_OBJECT_INFO(TypeName, ParentType)</div><div class="ttdoc">helper macro to declare type information in a final class. </div><div class="ttdef"><b>Definition:</b> object.h:671</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_ab845f7ed4ed85e360b730df3450d1aab"><div class="ttname"><a href="classtvm_1_1RangeNode.html#ab845f7ed4ed85e360b730df3450d1aab">tvm::RangeNode::RangeNode</a></div><div class="ttdeci">RangeNode()</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> expr.h:471</div></div>
-<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integer_01_4_html_a863930925e7354b87d35fde2ac310124"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integer_01_4.html#a863930925e7354b87d35fde2ac310124">tvm::runtime::PackedFuncValueConverter&lt; tvm::Integer &gt;::From</a></div><div class="ttdeci">static tvm::Integer From(const TVMPODValue_ &amp;val)</div><div class="ttdef"><b>Definition:</b> expr.h:565</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html_a4922785883bc78fdcdaa8169c85e63c9"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a4922785883bc78fdcdaa8169c85e63c9">tvm::RangeNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:475</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html_a903642f4abfde8fccc0144f97a25751e"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#a903642f4abfde8fccc0144f97a25751e">tvm::GlobalVarNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:236</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_aa6a49e1f6c40d4e24452114232bd1152"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#aa6a49e1f6c40d4e24452114232bd1152">tvm::RelayExprNode::type_as</a></div><div class="ttdeci">const TTypeNode * type_as() const</div><div class="ttdoc">Check if the inferred(checked) type of the Expr is backed by a TTypeNode and return it...</div><div class="ttdef"><b>Definition:</b> expr.h:530</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_ab845f7ed4ed85e360b730df3450d1aab"><div class="ttname"><a href="classtvm_1_1RangeNode.html#ab845f7ed4ed85e360b730df3450d1aab">tvm::RangeNode::RangeNode</a></div><div class="ttdeci">RangeNode()</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> expr.h:685</div></div>
+<div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integer_01_4_html_a863930925e7354b87d35fde2ac310124"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integer_01_4.html#a863930925e7354b87d35fde2ac310124">tvm::runtime::PackedFuncValueConverter&lt; tvm::Integer &gt;::From</a></div><div class="ttdeci">static tvm::Integer From(const TVMPODValue_ &amp;val)</div><div class="ttdef"><b>Definition:</b> expr.h:779</div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html_a4922785883bc78fdcdaa8169c85e63c9"><div class="ttname"><a href="classtvm_1_1RangeNode.html#a4922785883bc78fdcdaa8169c85e63c9">tvm::RangeNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:689</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html_a903642f4abfde8fccc0144f97a25751e"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html#a903642f4abfde8fccc0144f97a25751e">tvm::GlobalVarNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:450</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_aa6a49e1f6c40d4e24452114232bd1152"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#aa6a49e1f6c40d4e24452114232bd1152">tvm::RelayExprNode::type_as</a></div><div class="ttdeci">const TTypeNode * type_as() const</div><div class="ttdoc">Check if the inferred(checked) type of the Expr is backed by a TTypeNode and return it...</div><div class="ttdef"><b>Definition:</b> expr.h:744</div></div>
 <div class="ttc" id="classtvm_1_1PrimExprNode_html_a95af9234514ec5f11355db41524be7f9"><div class="ttname"><a href="classtvm_1_1PrimExprNode.html#a95af9234514ec5f11355db41524be7f9">tvm::PrimExprNode::dtype</a></div><div class="ttdeci">DataType dtype</div><div class="ttdoc">The runtime data type of the primitive expression. </div><div class="ttdef"><b>Definition:</b> expr.h:101</div></div>
 <div class="ttc" id="c__runtime__api_8h_html_a190e81769e805cca153514137a66e793ada22c3d9c62fa8f40d46e80ce83b96a1"><div class="ttname"><a href="c__runtime__api_8h.html#a190e81769e805cca153514137a66e793ada22c3d9c62fa8f40d46e80ce83b96a1">kTVMNullptr</a></div><div class="ttdef"><b>Definition:</b> c_runtime_api.h:114</div></div>
+<div class="ttc" id="namespacetvm_html_a18256ba1213ce5ff3cf8037a314354b7"><div class="ttname"><a href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">tvm::operator/</a></div><div class="ttdeci">PrimExpr operator/(PrimExpr a, PrimExpr b)</div><div class="ttdoc">division operator </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_aefca71073146f4be36d6a4a0de33d6e0"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#aefca71073146f4be36d6a4a0de33d6e0">tvm::runtime::TVMPODValue_::type_code</a></div><div class="ttdeci">int type_code() const</div><div class="ttdef"><b>Definition:</b> packed_func.h:610</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html_a35cd372573945f0dbb410fe54194c12e"><div class="ttname"><a href="classtvm_1_1Bool.html#a35cd372573945f0dbb410fe54194c12e">tvm::Bool::Bool</a></div><div class="ttdeci">Bool(bool value, Span span=Span())</div><div class="ttdef"><b>Definition:</b> expr.h:371</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html_a35cd372573945f0dbb410fe54194c12e"><div class="ttname"><a href="classtvm_1_1Bool.html#a35cd372573945f0dbb410fe54194c12e">tvm::Bool::Bool</a></div><div class="ttdeci">Bool(bool value, Span span=Span())</div><div class="ttdef"><b>Definition:</b> expr.h:585</div></div>
+<div class="ttc" id="namespacetvm_html_a598f8139c469abc4066dbdd0a0a0845d"><div class="ttname"><a href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">tvm::operator&lt;=</a></div><div class="ttdeci">PrimExpr operator&lt;=(PrimExpr a, PrimExpr b)</div><div class="ttdoc">less_equal </div></div>
 <div class="ttc" id="classtvm_1_1Type_html"><div class="ttname"><a href="classtvm_1_1Type.html">tvm::Type</a></div><div class="ttdoc">Managed reference to TypeNode. </div><div class="ttdef"><b>Definition:</b> type.h:93</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_a831794c879d47c30cdd9cefd4f05d533"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#a831794c879d47c30cdd9cefd4f05d533">tvm::BaseExprNode::TVM_DECLARE_BASE_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_BASE_OBJECT_INFO(BaseExprNode, Object)</div></div>
+<div class="ttc" id="namespacetvm_html_a354b9954ff25dd819a51d856fdd38827"><div class="ttname"><a href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">tvm::operator~</a></div><div class="ttdeci">PrimExpr operator~(PrimExpr a)</div><div class="ttdoc">take bitwise negation of two values </div></div>
 <div class="ttc" id="classtvm_1_1PrimExpr_html"><div class="ttname"><a href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a></div><div class="ttdoc">Reference to PrimExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:112</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html">tvm::GlobalVarNode</a></div><div class="ttdoc">Global variable that lives in the top-level module. </div><div class="ttdef"><b>Definition:</b> expr.h:231</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html">tvm::GlobalVarNode</a></div><div class="ttdoc">Global variable that lives in the top-level module. </div><div class="ttdef"><b>Definition:</b> expr.h:445</div></div>
+<div class="ttc" id="namespacetvm_html_a236d9aae385e6697874f75e4c8a69f8d"><div class="ttname"><a href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">tvm::operator|</a></div><div class="ttdeci">PrimExpr operator|(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take bitwise or of two values </div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="object_8h_html_a782d0de62fbf75736e29c1e79c22c7f1"><div class="ttname"><a href="object_8h.html#a782d0de62fbf75736e29c1e79c22c7f1">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:728</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html_ad2291d037ff36f5371f6381478b3eeaf"><div class="ttname"><a href="classtvm_1_1Integer.html#ad2291d037ff36f5371f6381478b3eeaf">tvm::Integer::operator==</a></div><div class="ttdeci">Bool operator==(Enum other) const</div><div class="ttdef"><b>Definition:</b> expr.h:452</div></div>
+<div class="ttc" id="namespacetvm_html_af246f441d4ac21b110185b77240b2dcc"><div class="ttname"><a href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">tvm::operator+</a></div><div class="ttdeci">PrimExpr operator+(PrimExpr a, PrimExpr b)</div><div class="ttdoc">add operator </div></div>
+<div class="ttc" id="classtvm_1_1Integer_html_ad2291d037ff36f5371f6381478b3eeaf"><div class="ttname"><a href="classtvm_1_1Integer.html#ad2291d037ff36f5371f6381478b3eeaf">tvm::Integer::operator==</a></div><div class="ttdeci">Bool operator==(Enum other) const</div><div class="ttdef"><b>Definition:</b> expr.h:666</div></div>
 <div class="ttc" id="structtvm_1_1runtime_1_1PackedFuncValueConverter_html"><div class="ttname"><a href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">tvm::runtime::PackedFuncValueConverter</a></div><div class="ttdoc">Type trait to specify special value conversion rules from TVMArgValue and TVMRetValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:1096</div></div>
 <div class="ttc" id="classtvm_1_1PrimExprNode_html"><div class="ttname"><a href="classtvm_1_1PrimExprNode.html">tvm::PrimExprNode</a></div><div class="ttdoc">Base node of all primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:85</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
-<div class="ttc" id="classtvm_1_1RangeNode_html"><div class="ttname"><a href="classtvm_1_1RangeNode.html">tvm::RangeNode</a></div><div class="ttdoc">range over one dimension </div><div class="ttdef"><b>Definition:</b> expr.h:462</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
+<div class="ttc" id="namespacetvm_html_ad93d00f7b080dc3f905f5c34c170a041"><div class="ttname"><a href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">tvm::operator&gt;</a></div><div class="ttdeci">PrimExpr operator&gt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">greater </div></div>
+<div class="ttc" id="classtvm_1_1RangeNode_html"><div class="ttname"><a href="classtvm_1_1RangeNode.html">tvm::RangeNode</a></div><div class="ttdoc">range over one dimension </div><div class="ttdef"><b>Definition:</b> expr.h:676</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/ir_2function_8h_source.html b/docs/reference/api/doxygen/ir_2function_8h_source.html
index c7a73288e..1a74f80a6 100644
--- a/docs/reference/api/doxygen/ir_2function_8h_source.html
+++ b/docs/reference/api/doxygen/ir_2function_8h_source.html
@@ -80,7 +80,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1attr_html_a7737d03caeeaeac61531ace9a91f7d74"><div class="ttname"><a href="namespacetvm_1_1attr.html#a7737d03caeeaeac61531ace9a91f7d74">tvm::attr::kGlobalSymbol</a></div><div class="ttdeci">constexpr const char * kGlobalSymbol</div><div class="ttdoc">Global linker symbol of the function in generated code. </div><div class="ttdef"><b>Definition:</b> function.h:191</div></div>
 <div class="ttc" id="classtvm_1_1DictAttrs_html_a4bff303c0d4e87ed01ffe4cdb36b05d0"><div class="ttname"><a href="classtvm_1_1DictAttrs.html#a4bff303c0d4e87ed01ffe4cdb36b05d0">tvm::DictAttrs::GetAttr</a></div><div class="ttdeci">Optional&lt; TObjectRef &gt; GetAttr(const std::string &amp;attr_key, Optional&lt; TObjectRef &gt; default_value=Optional&lt; TObjectRef &gt;(nullptr)) const</div><div class="ttdoc">Get a function attribute. </div><div class="ttdef"><b>Definition:</b> attrs.h:259</ [...]
 <div class="ttc" id="namespacetvm_html_a85e4536ea4e1c8d3d48c61135f0a8ff0"><div class="ttname"><a href="namespacetvm.html#a85e4536ea4e1c8d3d48c61135f0a8ff0">tvm::CallingConv</a></div><div class="ttdeci">CallingConv</div><div class="ttdoc">Possible Calling conventions. </div><div class="ttdef"><b>Definition:</b> function.h:44</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="namespacetvm_html_a85e4536ea4e1c8d3d48c61135f0a8ff0aea7c0ea92da6200bb14ea17a6094e2bc"><div class="ttname"><a href="namespacetvm.html#a85e4536ea4e1c8d3d48c61135f0a8ff0aea7c0ea92da6200bb14ea17a6094e2bc">tvm::CallingConv::kDeviceKernelLaunch</a></div><div class="ttdoc">Device kernel launch. </div></div>
 <div class="ttc" id="classtvm_1_1BaseFuncNode_html_a1fbf773665334f7d109c7c4c12d40d76"><div class="ttname"><a href="classtvm_1_1BaseFuncNode.html#a1fbf773665334f7d109c7c4c12d40d76">tvm::BaseFuncNode::HasNonzeroAttr</a></div><div class="ttdeci">bool HasNonzeroAttr(const std::string &amp;attr_key) const</div><div class="ttdoc">Check whether the function has an non-zero integer attr. </div><div class="ttdef"><b>Definition:</b> function.h:132</div></div>
@@ -90,7 +90,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1BaseFuncNode_html_a289921be4c78cc70bd34615edce7a820"><div class="ttname"><a href="classtvm_1_1BaseFuncNode.html#a289921be4c78cc70bd34615edce7a820">tvm::BaseFuncNode::GetAttr</a></div><div class="ttdeci">Optional&lt; TObjectRef &gt; GetAttr(const std::string &amp;attr_key, Optional&lt; TObjectRef &gt; default_value=Optional&lt; TObjectRef &gt;(nullptr)) const</div><div class="ttdoc">Get a function attribute. </div><div class="ttdef"><b>Definition:</b> func [...]
 <div class="ttc" id="classtvm_1_1BaseFunc_html"><div class="ttname"><a href="classtvm_1_1BaseFunc.html">tvm::BaseFunc</a></div><div class="ttdoc">Managed reference to BaseFuncNode. </div><div class="ttdef"><b>Definition:</b> function.h:143</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Optional_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Optional.html">tvm::runtime::Optional</a></div><div class="ttdoc">Optional container that to represent to a Nullable variant of T. </div><div class="ttdef"><b>Definition:</b> optional.h:51</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="object_8h_html_ac2b7418e9549512b5db0126cf2a716f1"><div class="ttname"><a href="object_8h.html#ac2b7418e9549512b5db0126cf2a716f1">TVM_DECLARE_BASE_OBJECT_INFO</a></div><div class="ttdeci">#define TVM_DECLARE_BASE_OBJECT_INFO(TypeName, ParentType)</div><div class="ttdoc">helper macro to declare a base object type that can be inherited. </div><div class="ttdef"><b>Definition:</b> object.h:648</div></div>
 <div class="ttc" id="classtvm_1_1DictAttrs_html_a45e524ce0f8ae389341ec37e468d887d"><div class="ttname"><a href="classtvm_1_1DictAttrs.html#a45e524ce0f8ae389341ec37e468d887d">tvm::DictAttrs::HasNonzeroAttr</a></div><div class="ttdeci">bool HasNonzeroAttr(const std::string &amp;attr_key) const</div><div class="ttdoc">Check whether the function has an non-zero integer attr. </div><div class="ttdef"><b>Definition:</b> attrs.h:298</div></div>
 </div><!-- fragment --></div><!-- contents -->
diff --git a/docs/reference/api/doxygen/ir_2module_8h_source.html b/docs/reference/api/doxygen/ir_2module_8h_source.html
index bbd21aaff..5492a06d9 100644
--- a/docs/reference/api/doxygen/ir_2module_8h_source.html
+++ b/docs/reference/api/doxygen/ir_2module_8h_source.html
@@ -71,7 +71,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1attr_html_a688274c1d1bdf64c3a44c373c58ab06d"><div class="ttname"><a href="namespacetvm_1_1attr.html#a688274c1d1bdf64c3a44c373c58ab06d">tvm::attr::kExecutor</a></div><div class="ttdeci">constexpr const char * kExecutor</div><div class="ttdoc">Executor targeted by the module. </div><div class="ttdef"><b>Definition:</b> module.h:491</div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_affbad8fa2513bd33cf8ac7d95aee132e"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#affbad8fa2513bd33cf8ac7d95aee132e">tvm::IRModuleNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> module.h:121</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectPtr_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectPtr.html">tvm::runtime::ObjectPtr</a></div><div class="ttdoc">A custom smart pointer for Object. </div><div class="ttdef"><b>Definition:</b> object.h:358</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:369</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:583</div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_a4840f698deaffe0e96317a436dfd079f"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#a4840f698deaffe0e96317a436dfd079f">tvm::IRModuleNode::TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_FINAL_OBJECT_INFO(IRModuleNode, Object)</div></div>
 <div class="ttc" id="namespacetvm_html_ad0e4314dd85303aa5e10d452e4c9db54"><div class="ttname"><a href="namespacetvm.html#ad0e4314dd85303aa5e10d452e4c9db54">tvm::AsText</a></div><div class="ttdeci">String AsText(const ObjectRef &amp;node, bool show_meta_data=true, runtime::TypedPackedFunc&lt; String(ObjectRef)&gt; annotate=nullptr)</div><div class="ttdoc">Render the node as a string in the text format. </div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_ad4267e40c959ea60f138b26d3276db88"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#ad4267e40c959ea60f138b26d3276db88">tvm::IRModuleNode::ImportFromStd</a></div><div class="ttdeci">void ImportFromStd(const String &amp;path)</div><div class="ttdoc">Import Relay code from the file at path, relative to the standard library. </div></div>
@@ -114,12 +114,12 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_ae078ad8def39579701d144578c787bcf"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#ae078ad8def39579701d144578c787bcf">tvm::IRModuleNode::LookupTag</a></div><div class="ttdeci">Constructor LookupTag(const int32_t tag)</div><div class="ttdoc">Look up a constructor by its tag. </div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_a2156216ccb73863c32a381a14319f71e"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#a2156216ccb73863c32a381a14319f71e">tvm::IRModuleNode::GetGlobalTypeVars</a></div><div class="ttdeci">Array&lt; GlobalTypeVar &gt; GetGlobalTypeVars() const</div><div class="ttdoc">Collect all global type vars defined in this module. </div></div>
-<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:261</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:475</div></div>
 <div class="ttc" id="classtvm_1_1IRModule_html_a2ea8b99b923647503f8213f22a0f6d7e"><div class="ttname"><a href="classtvm_1_1IRModule.html#a2ea8b99b923647503f8213f22a0f6d7e">tvm::IRModule::operator-&gt;</a></div><div class="ttdeci">IRModuleNode * operator-&gt;() const</div><div class="ttdef"><b>Definition:</b> module.h:383</div></div>
 <div class="ttc" id="classtvm_1_1IRModule_html_a69a94bf1f2e54191598b2271443d2696"><div class="ttname"><a href="classtvm_1_1IRModule.html#a69a94bf1f2e54191598b2271443d2696">tvm::IRModule::IRModule</a></div><div class="ttdeci">IRModule(ObjectPtr&lt; Object &gt; n)</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> module.h:381</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc</a></div><div class="ttdoc">Please refer to TypedPackedFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> packed_func.h:60</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_a2b30d3e406c01bb6287d1d7514ecaf46"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#a2b30d3e406c01bb6287d1d7514ecaf46">tvm::IRModuleNode::IRModule</a></div><div class="ttdeci">friend class IRModule</div><div class="ttdef"><b>Definition:</b> module.h:353</div></div>
 <div class="ttc" id="classtvm_1_1IRModuleNode_html_a86bbdc4b857ce5958a2b5f29e1d6fcb6"><div class="ttname"><a href="classtvm_1_1IRModuleNode.html#a86bbdc4b857ce5958a2b5f29e1d6fcb6">tvm::IRModuleNode::ShallowCopy</a></div><div class="ttdeci">IRModule ShallowCopy()</div><div class="ttdoc">Create a shallow copy of this IRModule. </div></div>
 <div class="ttc" id="ir_2adt_8h_html"><div class="ttname"><a href="ir_2adt_8h.html">adt.h</a></div><div class="ttdoc">Algebraic data type definitions. </div></div>
diff --git a/docs/reference/api/doxygen/ir_2op_8h_source.html b/docs/reference/api/doxygen/ir_2op_8h_source.html
index 16c633c78..560359bb3 100644
--- a/docs/reference/api/doxygen/ir_2op_8h_source.html
+++ b/docs/reference/api/doxygen/ir_2op_8h_source.html
@@ -115,7 +115,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1relay_html_a63321eb51080f3f57dd7563a3ca0bfa6"><div class="ttname"><a href="namespacetvm_1_1relay.html#a63321eb51080f3f57dd7563a3ca0bfa6">tvm::relay::TypeVar</a></div><div class="ttdeci">tvm::TypeVar TypeVar</div><div class="ttdef"><b>Definition:</b> type.h:49</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc</a></div><div class="ttdoc">Please refer to TypedPackedFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> packed_func.h:60</div></div>
 <div class="ttc" id="classtvm_1_1OpRegEntry_html_a0c6a92638cb8252abb4ddd55e7c9380f"><div class="ttname"><a href="classtvm_1_1OpRegEntry.html#a0c6a92638cb8252abb4ddd55e7c9380f">tvm::OpRegEntry::set_attrs_type_key</a></div><div class="ttdeci">OpRegEntry &amp; set_attrs_type_key(const String &amp;key)</div><div class="ttdoc">Set the attrs type key and index to be AttrsType. </div><div class="ttdef"><b>Definition:</b> op.h:463</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1Op_html"><div class="ttname"><a href="classtvm_1_1Op.html">tvm::Op</a></div><div class="ttdoc">Managed reference class to OpNode. </div><div class="ttdef"><b>Definition:</b> op.h:165</div></div>
 <div class="ttc" id="classtvm_1_1AttrRegistryMap_html"><div class="ttname"><a href="classtvm_1_1AttrRegistryMap.html">tvm::AttrRegistryMap</a></div><div class="ttdoc">Map&lt;Key, ValueType&gt; used to store meta-data. </div><div class="ttdef"><b>Definition:</b> attr_registry_map.h:101</div></div>
 <div class="ttc" id="classtvm_1_1OpNode_html_a9a5aacb190b2c4ead60b2810bd9053f4"><div class="ttname"><a href="classtvm_1_1OpNode.html#a9a5aacb190b2c4ead60b2810bd9053f4">tvm::OpNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> op.h:93</div></div>
@@ -137,7 +137,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1OpNode_html_a4c8ae999677faeb27ff8d8fc32e7b397"><div class="ttname"><a href="classtvm_1_1OpNode.html#a4c8ae999677faeb27ff8d8fc32e7b397">tvm::OpNode::attrs_type_key</a></div><div class="ttdeci">String attrs_type_key</div><div class="ttdoc">The type key of the attribute field This can be empty, in which case it defaults to anything...</div><div class="ttdef"><b>Definition:</b> op.h:75</div></div>
 <div class="ttc" id="classtvm_1_1OpNode_html_aa13505588162bc5448bd03f02f5c545b"><div class="ttname"><a href="classtvm_1_1OpNode.html#aa13505588162bc5448bd03f02f5c545b">tvm::OpNode::num_inputs</a></div><div class="ttdeci">int32_t num_inputs</div><div class="ttdoc">number of input arguments to the operator, -1 means it is variable length </div><div class="ttdef"><b>Definition:</b> op.h:85</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a2d76fa1fb628ff276a284e61123589c5"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a2d76fa1fb628ff276a284e61123589c5">tvm::runtime::ObjectRef::as</a></div><div class="ttdeci">const ObjectType * as() const</div><div class="ttdoc">Try to downcast the internal Object to a raw pointer of a corresponding type. </div><div class="ttdef"><b>Definition:</b> object.h:865</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="classtvm_1_1OpRegEntry_html_a5b6f62edc2c1718fbc98340524526f9c"><div class="ttname"><a href="classtvm_1_1OpRegEntry.html#a5b6f62edc2c1718fbc98340524526f9c">tvm::OpRegEntry::add_type_rel</a></div><div class="ttdeci">OpRegEntry &amp; add_type_rel(const std::string &amp;rel_name, runtime::TypedPackedFunc&lt; bool(const Array&lt; Type &gt; &amp;, int, const Attrs &amp;, const TypeReporter &amp;)&gt; type_rel_func)</div><div class="ttdoc">Attach the type function correspon [...]
 <div class="ttc" id="classtvm_1_1Op_html_abaafec14f5f05cc8bd3cdbf99eeb53d5"><div class="ttname"><a href="classtvm_1_1Op.html#abaafec14f5f05cc8bd3cdbf99eeb53d5">tvm::Op::Op</a></div><div class="ttdeci">Op(ObjectPtr&lt; Object &gt; n)</div><div class="ttdoc">constructor from node pointer </div><div class="ttdef"><b>Definition:</b> op.h:170</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a6841f97e06e6614dd7e82c6dd41b818a"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a6841f97e06e6614dd7e82c6dd41b818a">tvm::runtime::Object::TypeKey2Index</a></div><div class="ttdeci">static uint32_t TypeKey2Index(const std::string &amp;key)</div><div class="ttdoc">Get the type index of the corresponding key from runtime. </div></div>
diff --git a/docs/reference/api/doxygen/local__response__norm_8h_source.html b/docs/reference/api/doxygen/local__response__norm_8h_source.html
index 1f3d4299c..0ede616ce 100644
--- a/docs/reference/api/doxygen/local__response__norm_8h_source.html
+++ b/docs/reference/api/doxygen/local__response__norm_8h_source.html
@@ -67,14 +67,14 @@ $(function() {
 </div><!--header-->
 <div class="contents">
 <a href="local__response__norm_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_ac3d8de9144f915a5dd27dcb70c0abdd4"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#ac3d8de9144f915a5dd27dcb70c0abdd4">tvm::topi::nn::lrn</a></div><div class="ttdeci">Tensor lrn(const Tensor &amp;data, int size, int axis=1, float alpha=0.0001, float beta=0.75, float bias=2, std::string name=&quot;tensor&quot;, std::string tag=kBroadcast)</div><div class="ttdoc">Local response normalization inference operator. </div><div cla [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_ab5db2ee9a8be71931324dac552be24c4"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#ab5db2ee9a8be71931324dac552be24c4">tvm::runtime::Array::Set</a></div><div class="ttdeci">void Set(int64_t i, T value)</div><div class="ttdoc">set i-th element of the array. </div><div class="ttdef"><b>Definition:</b> array.h:567</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a13aaf23f0ab77f1ed4a7d4b7816bf210"><div class="ttname"><a href="namespacetvm_1_1topi.html#a13aaf23f0ab77f1ed4a7d4b7816bf210">tvm::topi::kBroadcast</a></div><div class="ttdeci">constexpr auto kBroadcast</div><div class="ttdef"><b>Definition:</b> tags.h:36</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_html_a16f9cd9219b505e2cc05c5a7558ac61f"><div class="ttname"><a href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">tvm::div</a></div><div class="ttdeci">PrimExpr div(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute division in C semantics. </div></div>
 <div class="ttc" id="namespacetvm_html_afdad0c0329bd39949ba8d296cfb85d76"><div class="ttname"><a href="namespacetvm.html#afdad0c0329bd39949ba8d296cfb85d76">tvm::sum</a></div><div class="ttdeci">PrimExpr sum(PrimExpr source, Array&lt; tir::IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">sum of of source expression over axis </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
diff --git a/docs/reference/api/doxygen/memory__pools_8h_source.html b/docs/reference/api/doxygen/memory__pools_8h_source.html
index bea05e084..2898cdb2a 100644
--- a/docs/reference/api/doxygen/memory__pools_8h_source.html
+++ b/docs/reference/api/doxygen/memory__pools_8h_source.html
@@ -68,7 +68,7 @@ $(function() {
 <div class="contents">
 <a href="memory__pools_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or mo [...]
 <div class="ttc" id="structtvm_1_1PoolInfoPropertiesNode_html_ad1a7d2d59d20c9a7761e548fcb6545f6"><div class="ttname"><a href="structtvm_1_1PoolInfoPropertiesNode.html#ad1a7d2d59d20c9a7761e548fcb6545f6">tvm::PoolInfoPropertiesNode::read_latency_cycles</a></div><div class="ttdeci">Integer read_latency_cycles</div><div class="ttdoc">The read latency in cycles. </div><div class="ttdef"><b>Definition:</b> memory_pools.h:161</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:369</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:583</div></div>
 <div class="ttc" id="classtvm_1_1WorkspacePoolInfo_html"><div class="ttname"><a href="classtvm_1_1WorkspacePoolInfo.html">tvm::WorkspacePoolInfo</a></div><div class="ttdef"><b>Definition:</b> memory_pools.h:235</div></div>
 <div class="ttc" id="structtvm_1_1WorkspaceMemoryPoolsNode_html_a8bda8248a09b413d9af0cc1c9a3acc72"><div class="ttname"><a href="structtvm_1_1WorkspaceMemoryPoolsNode.html#a8bda8248a09b413d9af0cc1c9a3acc72">tvm::WorkspaceMemoryPoolsNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> memory_pools.h:324</div></div>
 <div class="ttc" id="structtvm_1_1ConstantInfoNode_html_a054eb75fb628ae03d5cb4c4c7f7e8846"><div class="ttname"><a href="structtvm_1_1ConstantInfoNode.html#a054eb75fb628ae03d5cb4c4c7f7e8846">tvm::ConstantInfoNode::name_hint</a></div><div class="ttdeci">String name_hint</div><div class="ttdef"><b>Definition:</b> memory_pools.h:248</div></div>
@@ -147,7 +147,7 @@ $(function() {
 <div class="ttc" id="structtvm_1_1ConstantPoolInfoNode_html_abdfca3b12874ad29d203107c7ca8dcb5"><div class="ttname"><a href="structtvm_1_1ConstantPoolInfoNode.html#abdfca3b12874ad29d203107c7ca8dcb5">tvm::ConstantPoolInfoNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> memory_pools.h:297</div></div>
 <div class="ttc" id="registry_8h_html"><div class="ttname"><a href="registry_8h.html">registry.h</a></div><div class="ttdoc">This file defines the TVM global function registry. </div></div>
 <div class="ttc" id="structtvm_1_1WorkspacePoolInfoNode_html"><div class="ttname"><a href="structtvm_1_1WorkspacePoolInfoNode.html">tvm::WorkspacePoolInfoNode</a></div><div class="ttdef"><b>Definition:</b> memory_pools.h:222</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/namespacemembers_func_o.html b/docs/reference/api/doxygen/namespacemembers_func_o.html
index e74cee61d..5714be061 100644
--- a/docs/reference/api/doxygen/namespacemembers_func_o.html
+++ b/docs/reference/api/doxygen/namespacemembers_func_o.html
@@ -69,7 +69,7 @@ $(function() {
 , <a class="el" href="namespacetvm_1_1topi.html#a69bc76d169f422bffc6e0ee84afcea87">tvm::topi</a>
 </li>
 <li>operator &amp;&amp;()
-: <a class="el" href="namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483">tvm</a>
+: <a class="el" href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">tvm</a>
 , <a class="el" href="namespacetvm_1_1topi.html#adf6412ce526876d0b6fd743dba8ba40e">tvm::topi</a>
 </li>
 <li>operator!()
@@ -78,7 +78,7 @@ $(function() {
 </li>
 <li>operator!=()
 : <a class="el" href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#ab556e58d616b05ce4ae338f887b17cb4">tvm::runtime</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#a7d584f14566754b91a0e7f415b83fbda">tvm::runtime</a>
 , <a class="el" href="namespacetvm_1_1te.html#a8953e862224184536c2a94d773abf62d">tvm::te</a>
 </li>
 <li>operator%()
@@ -105,8 +105,8 @@ $(function() {
 : <a class="el" href="namespacetvm.html#a96061840d12f84eeecc8fae11e245242">tvm</a>
 </li>
 <li>operator-()
-: <a class="el" href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">tvm</a>
-, <a class="el" href="namespacetvm_1_1te.html#a2f041d85fc4eb81c053099cbbb617d1d">tvm::te</a>
+: <a class="el" href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">tvm</a>
+, <a class="el" href="namespacetvm_1_1te.html#a7cbaaf91ad0e43d83d88a522fcb4214b">tvm::te</a>
 , <a class="el" href="namespacetvm_1_1topi.html#a4e3b2e9b0b412291f2b26ac897870a91">tvm::topi</a>
 </li>
 <li>operator-=()
@@ -126,47 +126,47 @@ $(function() {
 <li>operator&lt;&lt;()
 : <a class="el" href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">tvm</a>
 , <a class="el" href="namespacetvm_1_1runtime.html#af22b89284299c81d0c1802199af446d7">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#a6bb44656b78b7d6a02ede706ed0a85ec">tvm::te</a>
+, <a class="el" href="namespacetvm_1_1te.html#afba65b87ed8a5587c48b1f63ff9d8437">tvm::te</a>
 , <a class="el" href="namespacetvm_1_1tir.html#aba58d59be99ed4026f32b0c10f690929">tvm::tir</a>
 , <a class="el" href="namespacetvm_1_1topi.html#a13eb3768682ba9bc6cec7022849ed021">tvm::topi</a>
 </li>
 <li>operator&lt;=()
-: <a class="el" href="namespacetvm.html#af94a56db543e741a23bbf2f51c49091a">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a2e76c697beb4a77556a869f7cc45f09a">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#a155868a829cdec5e04c00fee9fd6b8ab">tvm::te</a>
+: <a class="el" href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#a1d696c920a17b8c54775705062de75be">tvm::runtime</a>
+, <a class="el" href="namespacetvm_1_1te.html#ae7fe819e0a6e9615e65cabfe5058b498">tvm::te</a>
 </li>
 <li>operator==()
-: <a class="el" href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a5a5995094218eac7fe1f98e1abee852d">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#a640a91e87c3d3a55c5df248c1a194b90">tvm::te</a>
+: <a class="el" href="namespacetvm.html#a4368d3ec623c65b3626af11a1a7446ca">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#aba04626a0c1e717679d673bc90c6a23f">tvm::runtime</a>
+, <a class="el" href="namespacetvm_1_1te.html#a5d186948df24bd18a8aef7eee3b37727">tvm::te</a>
 </li>
 <li>operator&gt;()
-: <a class="el" href="namespacetvm.html#a7e2181bca182f90533ec35537714d09d">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a031e6c8e64cd9db11754355e3250ab4c">tvm::runtime</a>
+: <a class="el" href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#a2001f8f5205170a33ea1b9db314549c9">tvm::runtime</a>
 , <a class="el" href="namespacetvm_1_1te.html#a74074c1b06a426adb0f300944b8c4e88">tvm::te</a>
 </li>
 <li>operator&gt;=()
-: <a class="el" href="namespacetvm.html#a7a94a354cd62137652e09fa887a96100">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a8f8694e651078c6df7c5c26898ee9f14">tvm::runtime</a>
+: <a class="el" href="namespacetvm.html#ac194836fc11a8ba34e44738da17fd116">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#a944029d40e689e4d2acab53ce36a5c99">tvm::runtime</a>
 , <a class="el" href="namespacetvm_1_1te.html#a54c35df3fc069cb65ad6e28fa6b35109">tvm::te</a>
 </li>
 <li>operator&gt;&gt;()
-: <a class="el" href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">tvm</a>
-, <a class="el" href="namespacetvm_1_1te.html#a4a8524467a57ae005654a3f0cb816e3f">tvm::te</a>
-, <a class="el" href="namespacetvm_1_1topi.html#a27d3173a662930df8ab27f3f75ebfa4c">tvm::topi</a>
+: <a class="el" href="namespacetvm.html#abccc3d3e96ee608022b148ffa05034eb">tvm</a>
+, <a class="el" href="namespacetvm_1_1te.html#a9d3d9a057d5f1a36277ac4005f38bafa">tvm::te</a>
+, <a class="el" href="namespacetvm_1_1topi.html#a8feb934fc60a783af0509f7a3811c27d">tvm::topi</a>
 </li>
 <li>operator^()
-: <a class="el" href="namespacetvm.html#a6f638564e5e4d1023096523800f2579e">tvm</a>
+: <a class="el" href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">tvm</a>
 , <a class="el" href="namespacetvm_1_1topi.html#abef6b4a192138a38c651c2c347e60263">tvm::topi</a>
 </li>
 <li>operator|()
 : <a class="el" href="namespacetvm.html#a7c7fc3c45e6f6b52b2a1064deabd0797">tvm</a>
-, <a class="el" href="namespacetvm_1_1topi.html#a0e3d0c113031f4b209febd097e426e06">tvm::topi</a>
+, <a class="el" href="namespacetvm_1_1topi.html#a5fed408670c5215cb416f427bdefc512">tvm::topi</a>
 </li>
 <li>operator||()
-: <a class="el" href="namespacetvm.html#ae4ef6ceffc5778d734c2ddfc72020d60">tvm</a>
+: <a class="el" href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">tvm</a>
 , <a class="el" href="namespacetvm_1_1te.html#a1619810ecdc1c9b051522a4313a2c24e">tvm::te</a>
-, <a class="el" href="namespacetvm_1_1topi.html#aed48bd10491c0ba13a63b3ebb1bbd8fb">tvm::topi</a>
+, <a class="el" href="namespacetvm_1_1topi.html#ae5a2628177fa9009024a6f540c41b799">tvm::topi</a>
 </li>
 <li>operator~()
 : <a class="el" href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">tvm</a>
diff --git a/docs/reference/api/doxygen/namespacemembers_func_p.html b/docs/reference/api/doxygen/namespacemembers_func_p.html
index 7c33a5248..803915cb0 100644
--- a/docs/reference/api/doxygen/namespacemembers_func_p.html
+++ b/docs/reference/api/doxygen/namespacemembers_func_p.html
@@ -67,12 +67,12 @@ $(function() {
 <li>PackImportsToLLVM()
 : <a class="el" href="namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6">tvm::codegen</a>
 </li>
-<li>pad()
-: <a class="el" href="namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5">tvm::topi</a>
-</li>
 <li>Pad()
 : <a class="el" href="namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121">tvm::topi</a>
 </li>
+<li>pad()
+: <a class="el" href="namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5">tvm::topi</a>
+</li>
 <li>parallel_for()
 : <a class="el" href="namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c">tvm::support</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacemembers_o.html b/docs/reference/api/doxygen/namespacemembers_o.html
index 0f5da6996..1f4b6c036 100644
--- a/docs/reference/api/doxygen/namespacemembers_o.html
+++ b/docs/reference/api/doxygen/namespacemembers_o.html
@@ -75,7 +75,7 @@ $(function() {
 , <a class="el" href="namespacetvm_1_1topi.html#a69bc76d169f422bffc6e0ee84afcea87">tvm::topi</a>
 </li>
 <li>operator &amp;&amp;()
-: <a class="el" href="namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483">tvm</a>
+: <a class="el" href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">tvm</a>
 , <a class="el" href="namespacetvm_1_1topi.html#aa836ff50a0222fbfa248bd46402651b8">tvm::topi</a>
 </li>
 <li>operator!()
@@ -85,7 +85,7 @@ $(function() {
 <li>operator!=()
 : <a class="el" href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">tvm</a>
 , <a class="el" href="namespacetvm_1_1runtime.html#a7d584f14566754b91a0e7f415b83fbda">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#a2ec6eaac719b6db0b90cb3bc2111c627">tvm::te</a>
+, <a class="el" href="namespacetvm_1_1te.html#a9aa8f8b632906748d97cf155d9c41449">tvm::te</a>
 </li>
 <li>operator%()
 : <a class="el" href="namespacetvm.html#ab25738e50b37cd07b2d171ca74ba9321">tvm</a>
@@ -139,39 +139,39 @@ $(function() {
 <li>operator&lt;=()
 : <a class="el" href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">tvm</a>
 , <a class="el" href="namespacetvm_1_1runtime.html#a92428efae022d4982b2644f8960d4386">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#a9049756f490d96b37d24fb4a4d019d6e">tvm::te</a>
+, <a class="el" href="namespacetvm_1_1te.html#a155868a829cdec5e04c00fee9fd6b8ab">tvm::te</a>
 </li>
 <li>operator==()
-: <a class="el" href="namespacetvm.html#a04a0a3acfb061ec692ba8fc24e9eacba">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a81dae8fb1a2fa349cfb272525294f954">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#a640a91e87c3d3a55c5df248c1a194b90">tvm::te</a>
+: <a class="el" href="namespacetvm.html#a8b12d0bb7c343e149ae0631a2577547b">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#aba04626a0c1e717679d673bc90c6a23f">tvm::runtime</a>
+, <a class="el" href="namespacetvm_1_1te.html#a5d186948df24bd18a8aef7eee3b37727">tvm::te</a>
 </li>
 <li>operator&gt;()
-: <a class="el" href="namespacetvm.html#acc92dcd3d81981e983ddf05347bc9371">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a031e6c8e64cd9db11754355e3250ab4c">tvm::runtime</a>
-, <a class="el" href="namespacetvm_1_1te.html#af05f53104e6686e271783712280e4005">tvm::te</a>
+: <a class="el" href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#af7996be0c57184e8ff9e655d0d055edc">tvm::runtime</a>
+, <a class="el" href="namespacetvm_1_1te.html#a74074c1b06a426adb0f300944b8c4e88">tvm::te</a>
 </li>
 <li>operator&gt;=()
-: <a class="el" href="namespacetvm.html#ac194836fc11a8ba34e44738da17fd116">tvm</a>
-, <a class="el" href="namespacetvm_1_1runtime.html#a8f8694e651078c6df7c5c26898ee9f14">tvm::runtime</a>
+: <a class="el" href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">tvm</a>
+, <a class="el" href="namespacetvm_1_1runtime.html#a944029d40e689e4d2acab53ce36a5c99">tvm::runtime</a>
 , <a class="el" href="namespacetvm_1_1te.html#a5cbaee6b481ab46d55c17206e2487eed">tvm::te</a>
 </li>
 <li>operator&gt;&gt;()
 : <a class="el" href="namespacetvm.html#a550c2a251b2a6fd2a72172fe3db75d40">tvm</a>
-, <a class="el" href="namespacetvm_1_1te.html#a8705a88b943011532ff4c94c4b06c213">tvm::te</a>
-, <a class="el" href="namespacetvm_1_1topi.html#a27d3173a662930df8ab27f3f75ebfa4c">tvm::topi</a>
+, <a class="el" href="namespacetvm_1_1te.html#a9d3d9a057d5f1a36277ac4005f38bafa">tvm::te</a>
+, <a class="el" href="namespacetvm_1_1topi.html#a8feb934fc60a783af0509f7a3811c27d">tvm::topi</a>
 </li>
 <li>operator^()
-: <a class="el" href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">tvm</a>
-, <a class="el" href="namespacetvm_1_1topi.html#abef6b4a192138a38c651c2c347e60263">tvm::topi</a>
+: <a class="el" href="namespacetvm.html#a6f638564e5e4d1023096523800f2579e">tvm</a>
+, <a class="el" href="namespacetvm_1_1topi.html#a32379f4c2a17152ea26ea90967889847">tvm::topi</a>
 </li>
 <li>operator|()
-: <a class="el" href="namespacetvm.html#a7c7fc3c45e6f6b52b2a1064deabd0797">tvm</a>
+: <a class="el" href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">tvm</a>
 , <a class="el" href="namespacetvm_1_1topi.html#a5fed408670c5215cb416f427bdefc512">tvm::topi</a>
 </li>
 <li>operator||()
 : <a class="el" href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">tvm</a>
-, <a class="el" href="namespacetvm_1_1te.html#a1619810ecdc1c9b051522a4313a2c24e">tvm::te</a>
+, <a class="el" href="namespacetvm_1_1te.html#a1fd6b8f8380a489cfcd806952c2aae42">tvm::te</a>
 , <a class="el" href="namespacetvm_1_1topi.html#a2c5c3be4c5bc745dec9d3d9fa340a69e">tvm::topi</a>
 </li>
 <li>operator~()
diff --git a/docs/reference/api/doxygen/namespacemembers_p.html b/docs/reference/api/doxygen/namespacemembers_p.html
index eb3b32fd5..33cb466d5 100644
--- a/docs/reference/api/doxygen/namespacemembers_p.html
+++ b/docs/reference/api/doxygen/namespacemembers_p.html
@@ -67,12 +67,12 @@ $(function() {
 <li>PackImportsToLLVM()
 : <a class="el" href="namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6">tvm::codegen</a>
 </li>
-<li>pad()
-: <a class="el" href="namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5">tvm::topi</a>
-</li>
 <li>Pad()
 : <a class="el" href="namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121">tvm::topi</a>
 </li>
+<li>pad()
+: <a class="el" href="namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5">tvm::topi</a>
+</li>
 <li>parallel_for()
 : <a class="el" href="namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c">tvm::support</a>
 </li>
diff --git a/docs/reference/api/doxygen/namespacetvm.html b/docs/reference/api/doxygen/namespacetvm.html
index 9c6db559d..005955acb 100644
--- a/docs/reference/api/doxygen/namespacetvm.html
+++ b/docs/reference/api/doxygen/namespacetvm.html
@@ -661,6 +661,66 @@ Functions</h2></td></tr>
 <tr class="separator:a7e2bc626db8be997b1562c79df3d9e11"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a69a0e3f559d3a3b98d42701117d93ed0"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1DiagnosticRenderer.html">DiagnosticRenderer</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a69a0e3f559d3a3b98d42701117d93ed0">TerminalRenderer</a> (std::ostream &amp;ostream)</td></tr>
 <tr class="separator:a69a0e3f559d3a3b98d42701117d93ed0"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:af246f441d4ac21b110185b77240b2dcc"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">operator+</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:af246f441d4ac21b110185b77240b2dcc"><td class="mdescLeft">&#160;</td><td class="mdescRight">add operator  <a href="#af246f441d4ac21b110185b77240b2dcc">More...</a><br /></td></tr>
+<tr class="separator:af246f441d4ac21b110185b77240b2dcc"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abde487c0197942c4ebb1b47277b89dac"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">operator-</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:abde487c0197942c4ebb1b47277b89dac"><td class="mdescLeft">&#160;</td><td class="mdescRight">subtraction operator  <a href="#abde487c0197942c4ebb1b47277b89dac">More...</a><br /></td></tr>
+<tr class="separator:abde487c0197942c4ebb1b47277b89dac"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abc417454badf61b154d6a8d87cd8f171"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">operator-</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a)</td></tr>
+<tr class="memdesc:abc417454badf61b154d6a8d87cd8f171"><td class="mdescLeft">&#160;</td><td class="mdescRight">negation.  <a href="#abc417454badf61b154d6a8d87cd8f171">More...</a><br /></td></tr>
+<tr class="separator:abc417454badf61b154d6a8d87cd8f171"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">operator*</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a5c5034de2993b9130b7bd9d593a11bb5"><td class="mdescLeft">&#160;</td><td class="mdescRight">multiplication operator  <a href="#a5c5034de2993b9130b7bd9d593a11bb5">More...</a><br /></td></tr>
+<tr class="separator:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a18256ba1213ce5ff3cf8037a314354b7"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">operator/</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a18256ba1213ce5ff3cf8037a314354b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">division operator  <a href="#a18256ba1213ce5ff3cf8037a314354b7">More...</a><br /></td></tr>
+<tr class="separator:a18256ba1213ce5ff3cf8037a314354b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:af682776c3609284f1bc3ea436e21a67a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">operator&lt;&lt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:af682776c3609284f1bc3ea436e21a67a"><td class="mdescLeft">&#160;</td><td class="mdescRight">left shift operator  <a href="#af682776c3609284f1bc3ea436e21a67a">More...</a><br /></td></tr>
+<tr class="separator:af682776c3609284f1bc3ea436e21a67a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">operator&gt;&gt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="mdescLeft">&#160;</td><td class="mdescRight">right shift operator  <a href="#a1ce1eb32fc9d76ebe5a6b8d185024d41">More...</a><br /></td></tr>
+<tr class="separator:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ad93d00f7b080dc3f905f5c34c170a041"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">operator&gt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:ad93d00f7b080dc3f905f5c34c170a041"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater  <a href="#ad93d00f7b080dc3f905f5c34c170a041">More...</a><br /></td></tr>
+<tr class="separator:ad93d00f7b080dc3f905f5c34c170a041"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a5530417da455bd46f5dc55f27d69bcdf"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">operator&gt;=</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a5530417da455bd46f5dc55f27d69bcdf"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater_equal  <a href="#a5530417da455bd46f5dc55f27d69bcdf">More...</a><br /></td></tr>
+<tr class="separator:a5530417da455bd46f5dc55f27d69bcdf"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">operator&lt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="mdescLeft">&#160;</td><td class="mdescRight">less  <a href="#a1f98476c3a413f6cdfc7b7e490f3221b">More...</a><br /></td></tr>
+<tr class="separator:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a598f8139c469abc4066dbdd0a0a0845d"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">operator&lt;=</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a598f8139c469abc4066dbdd0a0a0845d"><td class="mdescLeft">&#160;</td><td class="mdescRight">less_equal  <a href="#a598f8139c469abc4066dbdd0a0a0845d">More...</a><br /></td></tr>
+<tr class="separator:a598f8139c469abc4066dbdd0a0a0845d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">operator==</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a2ea3b45c96d3980227e418f7158ce5c3"><td class="mdescLeft">&#160;</td><td class="mdescRight">equal  <a href="#a2ea3b45c96d3980227e418f7158ce5c3">More...</a><br /></td></tr>
+<tr class="separator:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a03983cf66713724c138f9697bb8e0e97"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">operator!=</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a03983cf66713724c138f9697bb8e0e97"><td class="mdescLeft">&#160;</td><td class="mdescRight">not_equal  <a href="#a03983cf66713724c138f9697bb8e0e97">More...</a><br /></td></tr>
+<tr class="separator:a03983cf66713724c138f9697bb8e0e97"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a7579d33e0aac9600dec46264a3f1edb8"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">operator &amp;&amp;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a7579d33e0aac9600dec46264a3f1edb8"><td class="mdescLeft">&#160;</td><td class="mdescRight">and  <a href="#a7579d33e0aac9600dec46264a3f1edb8">More...</a><br /></td></tr>
+<tr class="separator:a7579d33e0aac9600dec46264a3f1edb8"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">operator||</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="mdescLeft">&#160;</td><td class="mdescRight">or  <a href="#ac3bf2ef3556c995846dddcd84e5db8a6">More...</a><br /></td></tr>
+<tr class="separator:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:ab354bf1270121abea71fade83f13b0b0"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">operator!</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a)</td></tr>
+<tr class="memdesc:ab354bf1270121abea71fade83f13b0b0"><td class="mdescLeft">&#160;</td><td class="mdescRight">not  <a href="#ab354bf1270121abea71fade83f13b0b0">More...</a><br /></td></tr>
+<tr class="separator:ab354bf1270121abea71fade83f13b0b0"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a2a1269a38e7e3621eb2906a47157106a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">operator &amp;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a2a1269a38e7e3621eb2906a47157106a"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise and of two values  <a href="#a2a1269a38e7e3621eb2906a47157106a">More...</a><br /></td></tr>
+<tr class="separator:a2a1269a38e7e3621eb2906a47157106a"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a236d9aae385e6697874f75e4c8a69f8d"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">operator|</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:a236d9aae385e6697874f75e4c8a69f8d"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise or of two values  <a href="#a236d9aae385e6697874f75e4c8a69f8d">More...</a><br /></td></tr>
+<tr class="separator:a236d9aae385e6697874f75e4c8a69f8d"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">operator^</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
+<tr class="memdesc:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise xor of two values  <a href="#abd7d1b3232218b25e2e0cf6ef699a65f">More...</a><br /></td></tr>
+<tr class="separator:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memSeparator" colspan="2">&#160;</td></tr>
+<tr class="memitem:a354b9954ff25dd819a51d856fdd38827"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">operator~</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a)</td></tr>
+<tr class="memdesc:a354b9954ff25dd819a51d856fdd38827"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise negation of two values  <a href="#a354b9954ff25dd819a51d856fdd38827">More...</a><br /></td></tr>
+<tr class="separator:a354b9954ff25dd819a51d856fdd38827"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a002710a4652156a57495e10a09b5d002"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1Bool.html">Bool</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">operator||</a> (const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;a, bool b)</td></tr>
 <tr class="separator:a002710a4652156a57495e10a09b5d002"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a4c8c1c1c248859ce0d20f614e18a9524"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1Bool.html">Bool</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a4c8c1c1c248859ce0d20f614e18a9524">operator||</a> (bool a, const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;b)</td></tr>
@@ -747,96 +807,48 @@ Functions</h2></td></tr>
 <tr class="memitem:ae2794f261657780b2af4208b95d9cfcb"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ae2794f261657780b2af4208b95d9cfcb">add</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="e [...]
 <tr class="memdesc:ae2794f261657780b2af4208b95d9cfcb"><td class="mdescLeft">&#160;</td><td class="mdescRight">add operator  <a href="#ae2794f261657780b2af4208b95d9cfcb">More...</a><br /></td></tr>
 <tr class="separator:ae2794f261657780b2af4208b95d9cfcb"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:af246f441d4ac21b110185b77240b2dcc"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">operator+</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:af246f441d4ac21b110185b77240b2dcc"><td class="mdescLeft">&#160;</td><td class="mdescRight">add operator  <a href="#af246f441d4ac21b110185b77240b2dcc">More...</a><br /></td></tr>
-<tr class="separator:af246f441d4ac21b110185b77240b2dcc"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a89da021f5e3e2e911acfd96f973e5bc3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a89da021f5e3e2e911acfd96f973e5bc3">sub</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="e [...]
 <tr class="memdesc:a89da021f5e3e2e911acfd96f973e5bc3"><td class="mdescLeft">&#160;</td><td class="mdescRight">subtraction operator  <a href="#a89da021f5e3e2e911acfd96f973e5bc3">More...</a><br /></td></tr>
 <tr class="separator:a89da021f5e3e2e911acfd96f973e5bc3"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abde487c0197942c4ebb1b47277b89dac"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">operator-</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:abde487c0197942c4ebb1b47277b89dac"><td class="mdescLeft">&#160;</td><td class="mdescRight">subtraction operator  <a href="#abde487c0197942c4ebb1b47277b89dac">More...</a><br /></td></tr>
-<tr class="separator:abde487c0197942c4ebb1b47277b89dac"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a5cd85b156fb31f75f91c8a5c012f8a66"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5cd85b156fb31f75f91c8a5c012f8a66">neg</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="el" href="classtvm_1_1Span.html">Span</a>())</td></tr>
 <tr class="memdesc:a5cd85b156fb31f75f91c8a5c012f8a66"><td class="mdescLeft">&#160;</td><td class="mdescRight">negation.  <a href="#a5cd85b156fb31f75f91c8a5c012f8a66">More...</a><br /></td></tr>
 <tr class="separator:a5cd85b156fb31f75f91c8a5c012f8a66"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abc417454badf61b154d6a8d87cd8f171"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">operator-</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a)</td></tr>
-<tr class="memdesc:abc417454badf61b154d6a8d87cd8f171"><td class="mdescLeft">&#160;</td><td class="mdescRight">negation.  <a href="#abc417454badf61b154d6a8d87cd8f171">More...</a><br /></td></tr>
-<tr class="separator:abc417454badf61b154d6a8d87cd8f171"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:aaa28e92b677086d89ebfb77204bf92a2"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#aaa28e92b677086d89ebfb77204bf92a2">mul</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="e [...]
 <tr class="memdesc:aaa28e92b677086d89ebfb77204bf92a2"><td class="mdescLeft">&#160;</td><td class="mdescRight">multiplication operator  <a href="#aaa28e92b677086d89ebfb77204bf92a2">More...</a><br /></td></tr>
 <tr class="separator:aaa28e92b677086d89ebfb77204bf92a2"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">operator*</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a5c5034de2993b9130b7bd9d593a11bb5"><td class="mdescLeft">&#160;</td><td class="mdescRight">multiplication operator  <a href="#a5c5034de2993b9130b7bd9d593a11bb5">More...</a><br /></td></tr>
-<tr class="separator:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a18256ba1213ce5ff3cf8037a314354b7"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">operator/</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a18256ba1213ce5ff3cf8037a314354b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">division operator  <a href="#a18256ba1213ce5ff3cf8037a314354b7">More...</a><br /></td></tr>
-<tr class="separator:a18256ba1213ce5ff3cf8037a314354b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ad4fceb4266c6e7644fa373eacf73359f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad4fceb4266c6e7644fa373eacf73359f">left_shift</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a c [...]
 <tr class="memdesc:ad4fceb4266c6e7644fa373eacf73359f"><td class="mdescLeft">&#160;</td><td class="mdescRight">left shift operator  <a href="#ad4fceb4266c6e7644fa373eacf73359f">More...</a><br /></td></tr>
 <tr class="separator:ad4fceb4266c6e7644fa373eacf73359f"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:af682776c3609284f1bc3ea436e21a67a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">operator&lt;&lt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:af682776c3609284f1bc3ea436e21a67a"><td class="mdescLeft">&#160;</td><td class="mdescRight">left shift operator  <a href="#af682776c3609284f1bc3ea436e21a67a">More...</a><br /></td></tr>
-<tr class="separator:af682776c3609284f1bc3ea436e21a67a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ae8ecc0382685a855187bede0c97d93e6"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ae8ecc0382685a855187bede0c97d93e6">right_shift</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a  [...]
 <tr class="memdesc:ae8ecc0382685a855187bede0c97d93e6"><td class="mdescLeft">&#160;</td><td class="mdescRight">right shift operator  <a href="#ae8ecc0382685a855187bede0c97d93e6">More...</a><br /></td></tr>
 <tr class="separator:ae8ecc0382685a855187bede0c97d93e6"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">operator&gt;&gt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="mdescLeft">&#160;</td><td class="mdescRight">right shift operator  <a href="#a1ce1eb32fc9d76ebe5a6b8d185024d41">More...</a><br /></td></tr>
-<tr class="separator:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a7ffc1cdb3a52b680e4b509395c9a252d"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7ffc1cdb3a52b680e4b509395c9a252d">greater</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a clas [...]
 <tr class="memdesc:a7ffc1cdb3a52b680e4b509395c9a252d"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater  <a href="#a7ffc1cdb3a52b680e4b509395c9a252d">More...</a><br /></td></tr>
 <tr class="separator:a7ffc1cdb3a52b680e4b509395c9a252d"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ad93d00f7b080dc3f905f5c34c170a041"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">operator&gt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:ad93d00f7b080dc3f905f5c34c170a041"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater  <a href="#ad93d00f7b080dc3f905f5c34c170a041">More...</a><br /></td></tr>
-<tr class="separator:ad93d00f7b080dc3f905f5c34c170a041"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ab1b704bb5a31b602869fb5c94a56f468"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ab1b704bb5a31b602869fb5c94a56f468">greater_equal</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=< [...]
 <tr class="memdesc:ab1b704bb5a31b602869fb5c94a56f468"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater_equal  <a href="#ab1b704bb5a31b602869fb5c94a56f468">More...</a><br /></td></tr>
 <tr class="separator:ab1b704bb5a31b602869fb5c94a56f468"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a5530417da455bd46f5dc55f27d69bcdf"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">operator&gt;=</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a5530417da455bd46f5dc55f27d69bcdf"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater_equal  <a href="#a5530417da455bd46f5dc55f27d69bcdf">More...</a><br /></td></tr>
-<tr class="separator:a5530417da455bd46f5dc55f27d69bcdf"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a52fa1dc57423a077eb098960162e7b85"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a52fa1dc57423a077eb098960162e7b85">less</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class=" [...]
 <tr class="memdesc:a52fa1dc57423a077eb098960162e7b85"><td class="mdescLeft">&#160;</td><td class="mdescRight">less  <a href="#a52fa1dc57423a077eb098960162e7b85">More...</a><br /></td></tr>
 <tr class="separator:a52fa1dc57423a077eb098960162e7b85"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">operator&lt;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="mdescLeft">&#160;</td><td class="mdescRight">less  <a href="#a1f98476c3a413f6cdfc7b7e490f3221b">More...</a><br /></td></tr>
-<tr class="separator:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a6dfe80d16a7b4f551c87a8901d366d08"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a6dfe80d16a7b4f551c87a8901d366d08">less_equal</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a c [...]
 <tr class="memdesc:a6dfe80d16a7b4f551c87a8901d366d08"><td class="mdescLeft">&#160;</td><td class="mdescRight">less_equal  <a href="#a6dfe80d16a7b4f551c87a8901d366d08">More...</a><br /></td></tr>
 <tr class="separator:a6dfe80d16a7b4f551c87a8901d366d08"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a598f8139c469abc4066dbdd0a0a0845d"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">operator&lt;=</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a598f8139c469abc4066dbdd0a0a0845d"><td class="mdescLeft">&#160;</td><td class="mdescRight">less_equal  <a href="#a598f8139c469abc4066dbdd0a0a0845d">More...</a><br /></td></tr>
-<tr class="separator:a598f8139c469abc4066dbdd0a0a0845d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a1c4f14382b85bcfa57d9a3460db2354a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1c4f14382b85bcfa57d9a3460db2354a">equal</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class= [...]
 <tr class="memdesc:a1c4f14382b85bcfa57d9a3460db2354a"><td class="mdescLeft">&#160;</td><td class="mdescRight">equal  <a href="#a1c4f14382b85bcfa57d9a3460db2354a">More...</a><br /></td></tr>
 <tr class="separator:a1c4f14382b85bcfa57d9a3460db2354a"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">operator==</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a2ea3b45c96d3980227e418f7158ce5c3"><td class="mdescLeft">&#160;</td><td class="mdescRight">equal  <a href="#a2ea3b45c96d3980227e418f7158ce5c3">More...</a><br /></td></tr>
-<tr class="separator:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ac3932d85fd31819eae6a80841296af51"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ac3932d85fd31819eae6a80841296af51">not_equal</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a cl [...]
 <tr class="memdesc:ac3932d85fd31819eae6a80841296af51"><td class="mdescLeft">&#160;</td><td class="mdescRight">not_equal  <a href="#ac3932d85fd31819eae6a80841296af51">More...</a><br /></td></tr>
 <tr class="separator:ac3932d85fd31819eae6a80841296af51"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a03983cf66713724c138f9697bb8e0e97"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">operator!=</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a03983cf66713724c138f9697bb8e0e97"><td class="mdescLeft">&#160;</td><td class="mdescRight">not_equal  <a href="#a03983cf66713724c138f9697bb8e0e97">More...</a><br /></td></tr>
-<tr class="separator:a03983cf66713724c138f9697bb8e0e97"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a27d5567b95675d383c4675fdcd85346c"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a27d5567b95675d383c4675fdcd85346c">logical_and</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a  [...]
 <tr class="memdesc:a27d5567b95675d383c4675fdcd85346c"><td class="mdescLeft">&#160;</td><td class="mdescRight">and  <a href="#a27d5567b95675d383c4675fdcd85346c">More...</a><br /></td></tr>
 <tr class="separator:a27d5567b95675d383c4675fdcd85346c"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a7579d33e0aac9600dec46264a3f1edb8"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">operator &amp;&amp;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a7579d33e0aac9600dec46264a3f1edb8"><td class="mdescLeft">&#160;</td><td class="mdescRight">and  <a href="#a7579d33e0aac9600dec46264a3f1edb8">More...</a><br /></td></tr>
-<tr class="separator:a7579d33e0aac9600dec46264a3f1edb8"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a4509dece1af96338cc25097855fcecd7"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a4509dece1af96338cc25097855fcecd7">logical_or</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a c [...]
 <tr class="memdesc:a4509dece1af96338cc25097855fcecd7"><td class="mdescLeft">&#160;</td><td class="mdescRight">or  <a href="#a4509dece1af96338cc25097855fcecd7">More...</a><br /></td></tr>
 <tr class="separator:a4509dece1af96338cc25097855fcecd7"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">operator||</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="mdescLeft">&#160;</td><td class="mdescRight">or  <a href="#ac3bf2ef3556c995846dddcd84e5db8a6">More...</a><br /></td></tr>
-<tr class="separator:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a62955df1df48917116efe39d4cd18fec"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a62955df1df48917116efe39d4cd18fec">logical_not</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="el" href="classtvm_1_1Span.html">Span</a>())</td></tr>
 <tr class="memdesc:a62955df1df48917116efe39d4cd18fec"><td class="mdescLeft">&#160;</td><td class="mdescRight">not  <a href="#a62955df1df48917116efe39d4cd18fec">More...</a><br /></td></tr>
 <tr class="separator:a62955df1df48917116efe39d4cd18fec"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ab354bf1270121abea71fade83f13b0b0"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">operator!</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a)</td></tr>
-<tr class="memdesc:ab354bf1270121abea71fade83f13b0b0"><td class="mdescLeft">&#160;</td><td class="mdescRight">not  <a href="#ab354bf1270121abea71fade83f13b0b0">More...</a><br /></td></tr>
-<tr class="separator:ab354bf1270121abea71fade83f13b0b0"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a16f9cd9219b505e2cc05c5a7558ac61f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">div</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="e [...]
 <tr class="memdesc:a16f9cd9219b505e2cc05c5a7558ac61f"><td class="mdescLeft">&#160;</td><td class="mdescRight">compute division in C semantics.  <a href="#a16f9cd9219b505e2cc05c5a7558ac61f">More...</a><br /></td></tr>
 <tr class="separator:a16f9cd9219b505e2cc05c5a7558ac61f"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -873,27 +885,15 @@ Functions</h2></td></tr>
 <tr class="memitem:acebb0c446b76d5a28c3b1b55f827c86e"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#acebb0c446b76d5a28c3b1b55f827c86e">bitwise_and</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a  [...]
 <tr class="memdesc:acebb0c446b76d5a28c3b1b55f827c86e"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise and of two values  <a href="#acebb0c446b76d5a28c3b1b55f827c86e">More...</a><br /></td></tr>
 <tr class="separator:acebb0c446b76d5a28c3b1b55f827c86e"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a2a1269a38e7e3621eb2906a47157106a"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">operator &amp;</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a2a1269a38e7e3621eb2906a47157106a"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise and of two values  <a href="#a2a1269a38e7e3621eb2906a47157106a">More...</a><br /></td></tr>
-<tr class="separator:a2a1269a38e7e3621eb2906a47157106a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:aee8d9c7084d8df28bf6f05e0851a557f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#aee8d9c7084d8df28bf6f05e0851a557f">bitwise_or</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a c [...]
 <tr class="memdesc:aee8d9c7084d8df28bf6f05e0851a557f"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise or of two values  <a href="#aee8d9c7084d8df28bf6f05e0851a557f">More...</a><br /></td></tr>
 <tr class="separator:aee8d9c7084d8df28bf6f05e0851a557f"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a236d9aae385e6697874f75e4c8a69f8d"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">operator|</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:a236d9aae385e6697874f75e4c8a69f8d"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise or of two values  <a href="#a236d9aae385e6697874f75e4c8a69f8d">More...</a><br /></td></tr>
-<tr class="separator:a236d9aae385e6697874f75e4c8a69f8d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a6c238cafec94d03b8e70688d4cf82642"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a6c238cafec94d03b8e70688d4cf82642">bitwise_xor</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a  [...]
 <tr class="memdesc:a6c238cafec94d03b8e70688d4cf82642"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise xor of two values  <a href="#a6c238cafec94d03b8e70688d4cf82642">More...</a><br /></td></tr>
 <tr class="separator:a6c238cafec94d03b8e70688d4cf82642"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">operator^</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> b)</td></tr>
-<tr class="memdesc:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise xor of two values  <a href="#abd7d1b3232218b25e2e0cf6ef699a65f">More...</a><br /></td></tr>
-<tr class="separator:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3f6d8fba545c2944efc83b57e6190459"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a3f6d8fba545c2944efc83b57e6190459">bitwise_neg</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a, <a class="el" href="classtvm_1_1Span.html">Span</a> span=<a class="el" href="classtvm_1_1Span.html">Span</a>())</td></tr>
 <tr class="memdesc:a3f6d8fba545c2944efc83b57e6190459"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise negation of two values  <a href="#a3f6d8fba545c2944efc83b57e6190459">More...</a><br /></td></tr>
 <tr class="separator:a3f6d8fba545c2944efc83b57e6190459"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a354b9954ff25dd819a51d856fdd38827"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">operator~</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> a)</td></tr>
-<tr class="memdesc:a354b9954ff25dd819a51d856fdd38827"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise negation of two values  <a href="#a354b9954ff25dd819a51d856fdd38827">More...</a><br /></td></tr>
-<tr class="separator:a354b9954ff25dd819a51d856fdd38827"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a353217978feabae3575560bf1586885f"><td class="memItemLeft" align="right" valign="top"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a353217978feabae3575560bf1586885f">if_then_else</a> (<a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> cond, <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> true_value, <a class="el" href="classtvm_1_1PrimExpr.html"> [...]
 <tr class="memdesc:a353217978feabae3575560bf1586885f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Conditional expression.  <a href="#a353217978feabae3575560bf1586885f">More...</a><br /></td></tr>
 <tr class="separator:a353217978feabae3575560bf1586885f"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -3382,8 +3382,8 @@ template&lt;typename TAttrs &gt; </div>
 
 </div>
 </div>
-<a id="a0d9bb599eb28a6ca9d096247ff4d7fba"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a0d9bb599eb28a6ca9d096247ff4d7fba">&#9670;&nbsp;</a></span>div() <span class="overload">[4/6]</span></h2>
+<a id="a421c6836f0e87cd662320a8f6c23d452"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a421c6836f0e87cd662320a8f6c23d452">&#9670;&nbsp;</a></span>div() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -3394,13 +3394,13 @@ template&lt;typename TAttrs &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::div </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -3424,8 +3424,8 @@ template&lt;typename TAttrs &gt; </div>
 
 </div>
 </div>
-<a id="a421c6836f0e87cd662320a8f6c23d452"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a421c6836f0e87cd662320a8f6c23d452">&#9670;&nbsp;</a></span>div() <span class="overload">[5/6]</span></h2>
+<a id="a0d9bb599eb28a6ca9d096247ff4d7fba"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a0d9bb599eb28a6ca9d096247ff4d7fba">&#9670;&nbsp;</a></span>div() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -3436,13 +3436,13 @@ template&lt;typename TAttrs &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::div </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -3871,8 +3871,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a87200564215339b900ca546678fc71a4"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a87200564215339b900ca546678fc71a4">&#9670;&nbsp;</a></span>floordiv() <span class="overload">[2/3]</span></h2>
+<a id="a435a9df348bdb72e60bfe4ce410dcc58"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a435a9df348bdb72e60bfe4ce410dcc58">&#9670;&nbsp;</a></span>floordiv() <span class="overload">[2/3]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -3883,13 +3883,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::floordiv </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -3913,8 +3913,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a435a9df348bdb72e60bfe4ce410dcc58"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a435a9df348bdb72e60bfe4ce410dcc58">&#9670;&nbsp;</a></span>floordiv() <span class="overload">[3/3]</span></h2>
+<a id="a87200564215339b900ca546678fc71a4"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a87200564215339b900ca546678fc71a4">&#9670;&nbsp;</a></span>floordiv() <span class="overload">[3/3]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -3925,13 +3925,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::floordiv </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -5021,8 +5021,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a78b6faac00e9a823e9bcf14e1e02f1f2"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a78b6faac00e9a823e9bcf14e1e02f1f2">&#9670;&nbsp;</a></span>indexmod() <span class="overload">[2/3]</span></h2>
+<a id="a048a29e36fa055771713480f3f4ecdf4"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a048a29e36fa055771713480f3f4ecdf4">&#9670;&nbsp;</a></span>indexmod() <span class="overload">[2/3]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -5033,13 +5033,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::indexmod </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -5063,8 +5063,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a048a29e36fa055771713480f3f4ecdf4"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a048a29e36fa055771713480f3f4ecdf4">&#9670;&nbsp;</a></span>indexmod() <span class="overload">[3/3]</span></h2>
+<a id="a78b6faac00e9a823e9bcf14e1e02f1f2"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a78b6faac00e9a823e9bcf14e1e02f1f2">&#9670;&nbsp;</a></span>indexmod() <span class="overload">[3/3]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -5075,13 +5075,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::indexmod </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -5889,8 +5889,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="aec0ac319177760ff01be833bae8b72bf"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aec0ac319177760ff01be833bae8b72bf">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[3/6]</span></h2>
+<a id="ad4734f467b4107f0da21a510788479c1"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#ad4734f467b4107f0da21a510788479c1">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -5901,13 +5901,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::less_equal </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -5931,8 +5931,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a5cee73ced0a40ed261dc3beec9f8247c"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a5cee73ced0a40ed261dc3beec9f8247c">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[4/6]</span></h2>
+<a id="aec0ac319177760ff01be833bae8b72bf"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#aec0ac319177760ff01be833bae8b72bf">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -5943,13 +5943,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::less_equal </td>
           <td>(</td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -5973,8 +5973,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a59f1a9bebe7948e2570b8c01386253d4"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a59f1a9bebe7948e2570b8c01386253d4">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[5/6]</span></h2>
+<a id="a5cee73ced0a40ed261dc3beec9f8247c"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a5cee73ced0a40ed261dc3beec9f8247c">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -5985,13 +5985,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::less_equal </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -6015,8 +6015,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="ad4734f467b4107f0da21a510788479c1"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ad4734f467b4107f0da21a510788479c1">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[6/6]</span></h2>
+<a id="a59f1a9bebe7948e2570b8c01386253d4"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a59f1a9bebe7948e2570b8c01386253d4">&#9670;&nbsp;</a></span>less_equal() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -6027,13 +6027,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::less_equal </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -7586,8 +7586,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a92b9c69c93190d9057dd6f73ff93797a"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a92b9c69c93190d9057dd6f73ff93797a">&#9670;&nbsp;</a></span>mul() <span class="overload">[4/6]</span></h2>
+<a id="a40c70817dccaa589da0562bc8f179008"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a40c70817dccaa589da0562bc8f179008">&#9670;&nbsp;</a></span>mul() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -7598,13 +7598,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::mul </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -7670,8 +7670,8 @@ template&lt;typename TA &gt; </div>
 
 </div>
 </div>
-<a id="a40c70817dccaa589da0562bc8f179008"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a40c70817dccaa589da0562bc8f179008">&#9670;&nbsp;</a></span>mul() <span class="overload">[6/6]</span></h2>
+<a id="a92b9c69c93190d9057dd6f73ff93797a"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a92b9c69c93190d9057dd6f73ff93797a">&#9670;&nbsp;</a></span>mul() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -7682,13 +7682,13 @@ template&lt;typename TA &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::mul </td>
           <td>(</td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -8051,25 +8051,22 @@ template&lt;&gt; </div>
 
 </div>
 </div>
-<a id="a242b37bc39f3fc56d29e36f916cc1483"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a242b37bc39f3fc56d29e36f916cc1483">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[1/6]</span></h2>
+<a id="a7579d33e0aac9600dec46264a3f1edb8"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a7579d33e0aac9600dec46264a3f1edb8">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[1/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
-<table class="mlabels">
-  <tr>
-  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator&amp;&amp; </td>
+          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&amp;&amp; </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8078,17 +8075,23 @@ template&lt;&gt; </div>
           <td></td><td></td>
         </tr>
       </table>
-  </td>
-  <td class="mlabels-right">
-<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
-  </tr>
-</table>
 </div><div class="memdoc">
 
+<p>and </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">a</td><td>left operand </td></tr>
+    <tr><td class="paramname">b</td><td>right operand </td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>The result expression. </dd></dl>
+<dl class="section note"><dt>Note</dt><dd>This operator does eager constant folding. </dd></dl>
+
 </div>
 </div>
-<a id="a313252634ee340fcb374f25699832b5f"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a313252634ee340fcb374f25699832b5f">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[2/6]</span></h2>
+<a id="a242b37bc39f3fc56d29e36f916cc1483"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a242b37bc39f3fc56d29e36f916cc1483">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8099,13 +8102,13 @@ template&lt;&gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator&amp;&amp; </td>
           <td>(</td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8123,8 +8126,8 @@ template&lt;&gt; </div>
 
 </div>
 </div>
-<a id="a3d58c54be9c168b77bd3c9b6c3b962d3"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a3d58c54be9c168b77bd3c9b6c3b962d3">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[3/6]</span></h2>
+<a id="a313252634ee340fcb374f25699832b5f"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a313252634ee340fcb374f25699832b5f">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8135,7 +8138,7 @@ template&lt;&gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator&amp;&amp; </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
@@ -8159,22 +8162,25 @@ template&lt;&gt; </div>
 
 </div>
 </div>
-<a id="a7579d33e0aac9600dec46264a3f1edb8"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a7579d33e0aac9600dec46264a3f1edb8">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[4/6]</span></h2>
+<a id="a3d58c54be9c168b77bd3c9b6c3b962d3"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a3d58c54be9c168b77bd3c9b6c3b962d3">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&amp;&amp; </td>
+          <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator&amp;&amp; </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8183,23 +8189,17 @@ template&lt;&gt; </div>
           <td></td><td></td>
         </tr>
       </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
+  </tr>
+</table>
 </div><div class="memdoc">
 
-<p>and </p>
-<dl class="params"><dt>Parameters</dt><dd>
-  <table class="params">
-    <tr><td class="paramname">a</td><td>left operand </td></tr>
-    <tr><td class="paramname">b</td><td>right operand </td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns</dt><dd>The result expression. </dd></dl>
-<dl class="section note"><dt>Note</dt><dd>This operator does eager constant folding. </dd></dl>
-
 </div>
 </div>
-<a id="a453fac64e53716a977f867cb9665fde9"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a453fac64e53716a977f867cb9665fde9">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[5/6]</span></h2>
+<a id="a1975d7ce2d2cbaef575fef1198550f43"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a1975d7ce2d2cbaef575fef1198550f43">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8210,13 +8210,13 @@ template&lt;&gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&amp;&amp; </td>
           <td>(</td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8234,8 +8234,8 @@ template&lt;&gt; </div>
 
 </div>
 </div>
-<a id="a1975d7ce2d2cbaef575fef1198550f43"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a1975d7ce2d2cbaef575fef1198550f43">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[6/6]</span></h2>
+<a id="a453fac64e53716a977f867cb9665fde9"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a453fac64e53716a977f867cb9665fde9">&#9670;&nbsp;</a></span>operator &&() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8246,13 +8246,13 @@ template&lt;&gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&amp;&amp; </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8414,8 +8414,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a1815d8b152819885a5733554f374a9ca"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a1815d8b152819885a5733554f374a9ca">&#9670;&nbsp;</a></span>operator*() <span class="overload">[2/6]</span></h2>
+<a id="aca621e1d2df8562819bc021c1410b741"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#aca621e1d2df8562819bc021c1410b741">&#9670;&nbsp;</a></span>operator*() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8426,13 +8426,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator* </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8522,8 +8522,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="aca621e1d2df8562819bc021c1410b741"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aca621e1d2df8562819bc021c1410b741">&#9670;&nbsp;</a></span>operator*() <span class="overload">[5/6]</span></h2>
+<a id="a6823188ec16be854223bbffe349c975d"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a6823188ec16be854223bbffe349c975d">&#9670;&nbsp;</a></span>operator*() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8534,13 +8534,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator* </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8558,8 +8558,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a6823188ec16be854223bbffe349c975d"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a6823188ec16be854223bbffe349c975d">&#9670;&nbsp;</a></span>operator*() <span class="overload">[6/6]</span></h2>
+<a id="a1815d8b152819885a5733554f374a9ca"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a1815d8b152819885a5733554f374a9ca">&#9670;&nbsp;</a></span>operator*() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8576,7 +8576,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8669,8 +8669,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a2725c044e8067299c3dccbd453ce614f"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a2725c044e8067299c3dccbd453ce614f">&#9670;&nbsp;</a></span>operator+() <span class="overload">[2/6]</span></h2>
+<a id="ad728a6c2c3d21242a4df808aadb722eb"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#ad728a6c2c3d21242a4df808aadb722eb">&#9670;&nbsp;</a></span>operator+() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8681,13 +8681,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator+ </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -8813,8 +8813,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="ad728a6c2c3d21242a4df808aadb722eb"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ad728a6c2c3d21242a4df808aadb722eb">&#9670;&nbsp;</a></span>operator+() <span class="overload">[6/6]</span></h2>
+<a id="a2725c044e8067299c3dccbd453ce614f"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a2725c044e8067299c3dccbd453ce614f">&#9670;&nbsp;</a></span>operator+() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -8825,13 +8825,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator+ </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9024,8 +9024,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="af7c46ff33a2727f48b10d7d563f4a746"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#af7c46ff33a2727f48b10d7d563f4a746">&#9670;&nbsp;</a></span>operator-() <span class="overload">[5/7]</span></h2>
+<a id="a028ba217f99b6cb1592a6a56b2bc9ee5"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a028ba217f99b6cb1592a6a56b2bc9ee5">&#9670;&nbsp;</a></span>operator-() <span class="overload">[5/7]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9042,7 +9042,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9060,8 +9060,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a028ba217f99b6cb1592a6a56b2bc9ee5"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a028ba217f99b6cb1592a6a56b2bc9ee5">&#9670;&nbsp;</a></span>operator-() <span class="overload">[6/7]</span></h2>
+<a id="af7c46ff33a2727f48b10d7d563f4a746"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#af7c46ff33a2727f48b10d7d563f4a746">&#9670;&nbsp;</a></span>operator-() <span class="overload">[6/7]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9078,7 +9078,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9322,8 +9322,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a4c5092e248ab7daa5de5c22717670d8e"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a4c5092e248ab7daa5de5c22717670d8e">&#9670;&nbsp;</a></span>operator<() <span class="overload">[2/6]</span></h2>
+<a id="a46877235265ab97544ec2e561f521b0f"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a46877235265ab97544ec2e561f521b0f">&#9670;&nbsp;</a></span>operator<() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9334,13 +9334,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&lt; </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9358,8 +9358,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a46877235265ab97544ec2e561f521b0f"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a46877235265ab97544ec2e561f521b0f">&#9670;&nbsp;</a></span>operator<() <span class="overload">[3/6]</span></h2>
+<a id="a0854363590c38f5479b1da5e70c4f002"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a0854363590c38f5479b1da5e70c4f002">&#9670;&nbsp;</a></span>operator<() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9370,13 +9370,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&lt; </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9394,8 +9394,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="aa672271dbd566a0e7b9e4c87664bccb4"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aa672271dbd566a0e7b9e4c87664bccb4">&#9670;&nbsp;</a></span>operator<() <span class="overload">[4/6]</span></h2>
+<a id="a4c5092e248ab7daa5de5c22717670d8e"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a4c5092e248ab7daa5de5c22717670d8e">&#9670;&nbsp;</a></span>operator<() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9412,7 +9412,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9430,8 +9430,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a0854363590c38f5479b1da5e70c4f002"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a0854363590c38f5479b1da5e70c4f002">&#9670;&nbsp;</a></span>operator<() <span class="overload">[5/6]</span></h2>
+<a id="aa672271dbd566a0e7b9e4c87664bccb4"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#aa672271dbd566a0e7b9e4c87664bccb4">&#9670;&nbsp;</a></span>operator<() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9448,7 +9448,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9688,8 +9688,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a6eea8276bcc178425bc14f3d878970ff"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a6eea8276bcc178425bc14f3d878970ff">&#9670;&nbsp;</a></span>operator<=() <span class="overload">[3/6]</span></h2>
+<a id="a872f50bd7175eccf440865311aa75232"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a872f50bd7175eccf440865311aa75232">&#9670;&nbsp;</a></span>operator<=() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9700,13 +9700,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&lt;= </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9724,8 +9724,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a872f50bd7175eccf440865311aa75232"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a872f50bd7175eccf440865311aa75232">&#9670;&nbsp;</a></span>operator<=() <span class="overload">[4/6]</span></h2>
+<a id="a06d97bd5ee2c12e8547be0cc42f6b300"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a06d97bd5ee2c12e8547be0cc42f6b300">&#9670;&nbsp;</a></span>operator<=() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9736,13 +9736,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&lt;= </td>
           <td>(</td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -9796,8 +9796,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a06d97bd5ee2c12e8547be0cc42f6b300"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a06d97bd5ee2c12e8547be0cc42f6b300">&#9670;&nbsp;</a></span>operator<=() <span class="overload">[6/6]</span></h2>
+<a id="a6eea8276bcc178425bc14f3d878970ff"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a6eea8276bcc178425bc14f3d878970ff">&#9670;&nbsp;</a></span>operator<=() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -9814,7 +9814,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10018,8 +10018,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a6aeb6ed068c5de8ab908ff234337aeeb"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a6aeb6ed068c5de8ab908ff234337aeeb">&#9670;&nbsp;</a></span>operator>() <span class="overload">[2/6]</span></h2>
+<a id="a9cea8f3789d8f3dc78acae43e9a6aad6"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a9cea8f3789d8f3dc78acae43e9a6aad6">&#9670;&nbsp;</a></span>operator>() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10030,13 +10030,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt; </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10054,8 +10054,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a9cea8f3789d8f3dc78acae43e9a6aad6"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a9cea8f3789d8f3dc78acae43e9a6aad6">&#9670;&nbsp;</a></span>operator>() <span class="overload">[3/6]</span></h2>
+<a id="a6d0ad14c882c11311836138a2c164cf3"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a6d0ad14c882c11311836138a2c164cf3">&#9670;&nbsp;</a></span>operator>() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10066,13 +10066,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt; </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10090,8 +10090,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a6d0ad14c882c11311836138a2c164cf3"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a6d0ad14c882c11311836138a2c164cf3">&#9670;&nbsp;</a></span>operator>() <span class="overload">[4/6]</span></h2>
+<a id="a7e2181bca182f90533ec35537714d09d"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a7e2181bca182f90533ec35537714d09d">&#9670;&nbsp;</a></span>operator>() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10102,13 +10102,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt; </td>
           <td>(</td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10126,8 +10126,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a7e2181bca182f90533ec35537714d09d"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a7e2181bca182f90533ec35537714d09d">&#9670;&nbsp;</a></span>operator>() <span class="overload">[5/6]</span></h2>
+<a id="a6aeb6ed068c5de8ab908ff234337aeeb"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a6aeb6ed068c5de8ab908ff234337aeeb">&#9670;&nbsp;</a></span>operator>() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10138,13 +10138,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt; </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10273,8 +10273,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="ac194836fc11a8ba34e44738da17fd116"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ac194836fc11a8ba34e44738da17fd116">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[3/6]</span></h2>
+<a id="a7a94a354cd62137652e09fa887a96100"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a7a94a354cd62137652e09fa887a96100">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10285,13 +10285,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt;= </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10309,8 +10309,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a35961a6074b72fae0dfc48ee395e0673"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a35961a6074b72fae0dfc48ee395e0673">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[4/6]</span></h2>
+<a id="ac194836fc11a8ba34e44738da17fd116"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#ac194836fc11a8ba34e44738da17fd116">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10327,7 +10327,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10345,8 +10345,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a7a94a354cd62137652e09fa887a96100"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a7a94a354cd62137652e09fa887a96100">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[5/6]</span></h2>
+<a id="af7dee311b945dfc5a821a119c1db9ad1"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#af7dee311b945dfc5a821a119c1db9ad1">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10357,7 +10357,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt;= </td>
           <td>(</td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
@@ -10381,8 +10381,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="af7dee311b945dfc5a821a119c1db9ad1"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#af7dee311b945dfc5a821a119c1db9ad1">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[6/6]</span></h2>
+<a id="a35961a6074b72fae0dfc48ee395e0673"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a35961a6074b72fae0dfc48ee395e0673">&#9670;&nbsp;</a></span>operator>=() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10393,13 +10393,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator&gt;= </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10750,25 +10750,22 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a002710a4652156a57495e10a09b5d002"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a002710a4652156a57495e10a09b5d002">&#9670;&nbsp;</a></span>operator||() <span class="overload">[1/6]</span></h2>
+<a id="ac3bf2ef3556c995846dddcd84e5db8a6"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#ac3bf2ef3556c995846dddcd84e5db8a6">&#9670;&nbsp;</a></span>operator||() <span class="overload">[1/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
-<table class="mlabels">
-  <tr>
-  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator|| </td>
+          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator|| </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10777,17 +10774,23 @@ template&lt;typename TB &gt; </div>
           <td></td><td></td>
         </tr>
       </table>
-  </td>
-  <td class="mlabels-right">
-<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
-  </tr>
-</table>
 </div><div class="memdoc">
 
+<p>or </p>
+<dl class="params"><dt>Parameters</dt><dd>
+  <table class="params">
+    <tr><td class="paramname">a</td><td>left operand </td></tr>
+    <tr><td class="paramname">b</td><td>right operand </td></tr>
+  </table>
+  </dd>
+</dl>
+<dl class="section return"><dt>Returns</dt><dd>The result expression. </dd></dl>
+<dl class="section note"><dt>Note</dt><dd>This operator does eager constant folding. </dd></dl>
+
 </div>
 </div>
-<a id="a4c8c1c1c248859ce0d20f614e18a9524"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a4c8c1c1c248859ce0d20f614e18a9524">&#9670;&nbsp;</a></span>operator||() <span class="overload">[2/6]</span></h2>
+<a id="a002710a4652156a57495e10a09b5d002"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a002710a4652156a57495e10a09b5d002">&#9670;&nbsp;</a></span>operator||() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10798,13 +10801,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator|| </td>
           <td>(</td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10822,8 +10825,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="ae4ef6ceffc5778d734c2ddfc72020d60"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ae4ef6ceffc5778d734c2ddfc72020d60">&#9670;&nbsp;</a></span>operator||() <span class="overload">[3/6]</span></h2>
+<a id="a4c8c1c1c248859ce0d20f614e18a9524"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a4c8c1c1c248859ce0d20f614e18a9524">&#9670;&nbsp;</a></span>operator||() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10834,7 +10837,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator|| </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
@@ -10858,22 +10861,25 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="ac3bf2ef3556c995846dddcd84e5db8a6"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#ac3bf2ef3556c995846dddcd84e5db8a6">&#9670;&nbsp;</a></span>operator||() <span class="overload">[4/6]</span></h2>
+<a id="ae4ef6ceffc5778d734c2ddfc72020d60"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#ae4ef6ceffc5778d734c2ddfc72020d60">&#9670;&nbsp;</a></span>operator||() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
+<table class="mlabels">
+  <tr>
+  <td class="mlabels-left">
       <table class="memname">
         <tr>
-          <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator|| </td>
+          <td class="memname"><a class="el" href="classtvm_1_1Bool.html">Bool</a> tvm::operator|| </td>
           <td>(</td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a>&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1Bool.html">Bool</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10882,23 +10888,17 @@ template&lt;typename TB &gt; </div>
           <td></td><td></td>
         </tr>
       </table>
+  </td>
+  <td class="mlabels-right">
+<span class="mlabels"><span class="mlabel">inline</span></span>  </td>
+  </tr>
+</table>
 </div><div class="memdoc">
 
-<p>or </p>
-<dl class="params"><dt>Parameters</dt><dd>
-  <table class="params">
-    <tr><td class="paramname">a</td><td>left operand </td></tr>
-    <tr><td class="paramname">b</td><td>right operand </td></tr>
-  </table>
-  </dd>
-</dl>
-<dl class="section return"><dt>Returns</dt><dd>The result expression. </dd></dl>
-<dl class="section note"><dt>Note</dt><dd>This operator does eager constant folding. </dd></dl>
-
 </div>
 </div>
-<a id="a873bb60c71f37cbb743e21797a53ba06"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a873bb60c71f37cbb743e21797a53ba06">&#9670;&nbsp;</a></span>operator||() <span class="overload">[5/6]</span></h2>
+<a id="a1a3f9ad4d0e25eee9c0b3a9c83114bc0"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a1a3f9ad4d0e25eee9c0b3a9c83114bc0">&#9670;&nbsp;</a></span>operator||() <span class="overload">[5/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10909,13 +10909,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator|| </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -10933,8 +10933,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a1a3f9ad4d0e25eee9c0b3a9c83114bc0"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a1a3f9ad4d0e25eee9c0b3a9c83114bc0">&#9670;&nbsp;</a></span>operator||() <span class="overload">[6/6]</span></h2>
+<a id="a873bb60c71f37cbb743e21797a53ba06"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a873bb60c71f37cbb743e21797a53ba06">&#9670;&nbsp;</a></span>operator||() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -10945,13 +10945,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::operator|| </td>
           <td>(</td>
-          <td class="paramtype">bool&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">bool&#160;</td>
           <td class="paramname"><em>b</em>&#160;</td>
         </tr>
         <tr>
@@ -11819,8 +11819,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="af2d75a528d344c6cfcf8b726a6abb7cc"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#af2d75a528d344c6cfcf8b726a6abb7cc">&#9670;&nbsp;</a></span>sub() <span class="overload">[2/6]</span></h2>
+<a id="a7470d45dafa0a91b6c62b25cdd61514e"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a7470d45dafa0a91b6c62b25cdd61514e">&#9670;&nbsp;</a></span>sub() <span class="overload">[2/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -11837,7 +11837,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">float&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -11861,8 +11861,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a9d05fc8e6a57888c5fd908152bf6d84a"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a9d05fc8e6a57888c5fd908152bf6d84a">&#9670;&nbsp;</a></span>sub() <span class="overload">[3/6]</span></h2>
+<a id="a9cbbc9769fffddd99dfb4ab3f69dafb3"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a9cbbc9769fffddd99dfb4ab3f69dafb3">&#9670;&nbsp;</a></span>sub() <span class="overload">[3/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -11873,13 +11873,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::sub </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">double&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -11903,8 +11903,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a9cbbc9769fffddd99dfb4ab3f69dafb3"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a9cbbc9769fffddd99dfb4ab3f69dafb3">&#9670;&nbsp;</a></span>sub() <span class="overload">[4/6]</span></h2>
+<a id="af2d75a528d344c6cfcf8b726a6abb7cc"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#af2d75a528d344c6cfcf8b726a6abb7cc">&#9670;&nbsp;</a></span>sub() <span class="overload">[4/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -11915,13 +11915,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::sub </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">float&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -11987,8 +11987,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="a7470d45dafa0a91b6c62b25cdd61514e"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#a7470d45dafa0a91b6c62b25cdd61514e">&#9670;&nbsp;</a></span>sub() <span class="overload">[6/6]</span></h2>
+<a id="a9d05fc8e6a57888c5fd908152bf6d84a"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#a9d05fc8e6a57888c5fd908152bf6d84a">&#9670;&nbsp;</a></span>sub() <span class="overload">[6/6]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -12005,7 +12005,7 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">double&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -12387,8 +12387,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="aa7e06644ed017169c5ce6829445ab754"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aa7e06644ed017169c5ce6829445ab754">&#9670;&nbsp;</a></span>truncmod() <span class="overload">[2/3]</span></h2>
+<a id="aa388c5d23548438d00d64d0084e82391"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#aa388c5d23548438d00d64d0084e82391">&#9670;&nbsp;</a></span>truncmod() <span class="overload">[2/3]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -12399,13 +12399,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::truncmod </td>
           <td>(</td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
@@ -12429,8 +12429,8 @@ template&lt;typename TB &gt; </div>
 
 </div>
 </div>
-<a id="aa388c5d23548438d00d64d0084e82391"></a>
-<h2 class="memtitle"><span class="permalink"><a href="#aa388c5d23548438d00d64d0084e82391">&#9670;&nbsp;</a></span>truncmod() <span class="overload">[3/3]</span></h2>
+<a id="aa7e06644ed017169c5ce6829445ab754"></a>
+<h2 class="memtitle"><span class="permalink"><a href="#aa7e06644ed017169c5ce6829445ab754">&#9670;&nbsp;</a></span>truncmod() <span class="overload">[3/3]</span></h2>
 
 <div class="memitem">
 <div class="memproto">
@@ -12441,13 +12441,13 @@ template&lt;typename TB &gt; </div>
         <tr>
           <td class="memname"><a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> tvm::truncmod </td>
           <td>(</td>
-          <td class="paramtype">int&#160;</td>
+          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
           <td class="paramname"><em>a</em>, </td>
         </tr>
         <tr>
           <td class="paramkey"></td>
           <td></td>
-          <td class="paramtype">const <a class="el" href="classtvm_1_1PrimExpr.html">PrimExpr</a> &amp;&#160;</td>
+          <td class="paramtype">int&#160;</td>
           <td class="paramname"><em>b</em>, </td>
         </tr>
         <tr>
diff --git a/docs/reference/api/doxygen/nn_2bnn_8h_source.html b/docs/reference/api/doxygen/nn_2bnn_8h_source.html
index 3720a9037..7b56bedb3 100644
--- a/docs/reference/api/doxygen/nn_2bnn_8h_source.html
+++ b/docs/reference/api/doxygen/nn_2bnn_8h_source.html
@@ -66,8 +66,8 @@ $(function() {
 <div class="title">bnn.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="nn_2bnn_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<a href="nn_2bnn_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
@@ -77,7 +77,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="constant__utils_8h_html"><div class="ttname"><a href="constant__utils_8h.html">constant_utils.h</a></div><div class="ttdoc">Utility functions for handling constants in TVM expressions. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_af580cd1bea6e862f41c7fad4c4c7eea3"><div class="ttname"><a href="namespacetvm_1_1topi.html#af580cd1bea6e862f41c7fad4c4c7eea3">tvm::topi::sign</a></div><div class="ttdeci">Tensor sign(const Tensor &amp;x, std::string name=&quot;T_sign&quot;, std::string tag=kElementWise)</div><div class="ttdoc">Returns the sign of the tensor. </div><div class="ttdef"><b>Definition:</b> elemwise.h:211</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_html_afdad0c0329bd39949ba8d296cfb85d76"><div class="ttname"><a href="namespacetvm.html#afdad0c0329bd39949ba8d296cfb85d76">tvm::sum</a></div><div class="ttdeci">PrimExpr sum(PrimExpr source, Array&lt; tir::IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">sum of of source expression over axis </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a0250c4095f19ae8a22ed85bc4ce5a40d"><div class="ttname"><a href="namespacetvm_1_1topi.html#a0250c4095f19ae8a22ed85bc4ce5a40d">tvm::topi::kElementWise</a></div><div class="ttdeci">constexpr auto kElementWise</div><div class="ttdef"><b>Definition:</b> tags.h:32</div></div>
diff --git a/docs/reference/api/doxygen/nn_2dense_8h_source.html b/docs/reference/api/doxygen/nn_2dense_8h_source.html
index 6d9d94c42..8295e6e33 100644
--- a/docs/reference/api/doxygen/nn_2dense_8h_source.html
+++ b/docs/reference/api/doxygen/nn_2dense_8h_source.html
@@ -71,7 +71,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
 <div class="ttc" id="namespacetvm_html_a4bfb789a86d95f6241b50fd26f269c28"><div class="ttname"><a href="namespacetvm.html#a4bfb789a86d95f6241b50fd26f269c28">tvm::cast</a></div><div class="ttdeci">PrimExpr cast(const DataType &amp;t, PrimExpr value, Span span=Span())</div><div class="ttdoc">cast value to type. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a13aaf23f0ab77f1ed4a7d4b7816bf210"><div class="ttname"><a href="namespacetvm_1_1topi.html#a13aaf23f0ab77f1ed4a7d4b7816bf210">tvm::topi::kBroadcast</a></div><div class="ttdeci">constexpr auto kBroadcast</div><div class="ttdef"><b>Definition:</b> tags.h:36</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_a34e1a8305acf89ef2f745c8d99bf8e89"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#a34e1a8305acf89ef2f745c8d99bf8e89">tvm::topi::nn::dense</a></div><div class="ttdeci">tvm::te::Tensor dense(const tvm::te::Tensor &amp;data, const tvm::te::Tensor &amp;weight, const tvm::te::Tensor &amp;bias, const DataType &amp;out_dtype)</div><div class="ttdoc">Creates an operation that calculates data * weight^T + bias. </div><div class="t [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a17d8d5ad92691f9e18e3e0ae8ef69e4f"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a17d8d5ad92691f9e18e3e0ae8ef69e4f">tvm::runtime::ObjectRef::defined</a></div><div class="ttdeci">bool defined() const</div><div class="ttdef"><b>Definition:</b> object.h:544</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html">tvm::runtime::DataType</a></div><div class="ttdoc">Runtime primitive data type. </div><div class="ttdef"><b>Definition:</b> data_type.h:41</div></div>
diff --git a/docs/reference/api/doxygen/nn_2pooling_8h_source.html b/docs/reference/api/doxygen/nn_2pooling_8h_source.html
index cde6a310b..47b4f3636 100644
--- a/docs/reference/api/doxygen/nn_2pooling_8h_source.html
+++ b/docs/reference/api/doxygen/nn_2pooling_8h_source.html
@@ -72,7 +72,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_html_a3b37fa55ea93d6868751a2441996b072"><div class="ttname"><a href="namespacetvm.html#a3b37fa55ea93d6868751a2441996b072">tvm::min_value</a></div><div class="ttdeci">PrimExpr min_value(const DataType &amp;dtype, Span span=Span())</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a4b434e701bc9835e2a7de8f0fadebea5"><div class="ttname"><a href="namespacetvm_1_1topi.html#a4b434e701bc9835e2a7de8f0fadebea5">tvm::topi::MakeArgmaxReducer</a></div><div class="ttdeci">FCommReduce MakeArgmaxReducer(bool select_last_index=false)</div><div class="ttdef"><b>Definition:</b> reduction.h:495</div></div>
 <div class="ttc" id="namespacetvm_html_ada5ad8338d3144221d8f16380e6c4855"><div class="ttname"><a href="namespacetvm.html#ada5ad8338d3144221d8f16380e6c4855">tvm::indexmod</a></div><div class="ttdeci">PrimExpr indexmod(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute the remainder floor(a / b) where a and b are non-negative. </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_ae488679377c78cd5411b7df11c297673"><div class="ttname"><a href="namespacetvm_1_1topi.html#ae488679377c78cd5411b7df11c297673">tvm::topi::min</a></div><div class="ttdeci">Tensor min(const Tensor &amp;data, const Array&lt; Integer &gt; &amp;axis, bool keepdims=false, bool atleast1d=false)</div><div class="ttdoc">Creates an operation that finds the minimum of elements over a given axis. </div><div class="ttdef"><b>Definition:</b> reduction.h:410 [...]
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_a3fb74f1c3df6edf17c9a3f1e122f84fe"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#a3fb74f1c3df6edf17c9a3f1e122f84fe">tvm::topi::nn::adaptive_pool_impl</a></div><div class="ttdeci">Tensor adaptive_pool_impl(const Tensor &amp;x, const Array&lt; PrimExpr &gt; &amp;output_size, PoolType pool_type, const std::vector&lt; int &gt; &amp;axes)</div><div class="ttdoc">Perform adaptive pooling on N dimensional data. </div><div class [...]
@@ -84,11 +84,11 @@ $(function() {
 <div class="ttc" id="reduction_8h_html"><div class="ttname"><a href="reduction_8h.html">reduction.h</a></div><div class="ttdoc">Reduction op constructors. </div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html_a9b440f852f12ad0a4d8ed5ed97054425"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html#a9b440f852f12ad0a4d8ed5ed97054425">tvm::arith::Analyzer::Simplify</a></div><div class="ttdeci">PrimExpr Simplify(const PrimExpr &amp;expr, int steps=2)</div><div class="ttdoc">Simplify expr. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_a3ffa0974d8cdcd5b8ca7afb3cfbaf53c"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#a3ffa0974d8cdcd5b8ca7afb3cfbaf53c">tvm::topi::nn::PoolType</a></div><div class="ttdeci">PoolType</div><div class="ttdoc">Pooling type. </div><div class="ttdef"><b>Definition:</b> pooling.h:44</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_acbe8f225faaf34c540194921a7ee6a66"><div class="ttname"><a href="namespacetvm_1_1tir.html#acbe8f225faaf34c540194921a7ee6a66">tvm::tir::as_const_int</a></div><div class="ttdeci">const int64_t * as_const_int(const PrimExpr &amp;x)</div><div class="ttdoc">Get x as constant int expression. </div><div class="ttdef"><b>Definition:</b> op.h:985</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_acbe8f225faaf34c540194921a7ee6a66"><div class="ttname"><a href="namespacetvm_1_1tir.html#acbe8f225faaf34c540194921a7ee6a66">tvm::tir::as_const_int</a></div><div class="ttdeci">const int64_t * as_const_int(const PrimExpr &amp;x)</div><div class="ttdoc">Get x as constant int expression. </div><div class="ttdef"><b>Definition:</b> op.h:790</div></div>
 <div class="ttc" id="namespacetvm_html_a4bfb789a86d95f6241b50fd26f269c28"><div class="ttname"><a href="namespacetvm.html#a4bfb789a86d95f6241b50fd26f269c28">tvm::cast</a></div><div class="ttdeci">PrimExpr cast(const DataType &amp;t, PrimExpr value, Span span=Span())</div><div class="ttdoc">cast value to type. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_ab5db2ee9a8be71931324dac552be24c4"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#ab5db2ee9a8be71931324dac552be24c4">tvm::runtime::Array::Set</a></div><div class="ttdeci">void Set(int64_t i, T value)</div><div class="ttdoc">set i-th element of the array. </div><div class="ttdef"><b>Definition:</b> array.h:567</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_html_a16f9cd9219b505e2cc05c5a7558ac61f"><div class="ttname"><a href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">tvm::div</a></div><div class="ttdeci">PrimExpr div(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute division in C semantics. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aed6387e67d18b9d5ad18f510fd600a25"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aed6387e67d18b9d5ad18f510fd600a25">tvm::runtime::Array::size</a></div><div class="ttdeci">size_t size() const</div><div class="ttdef"><b>Definition:</b> array.h:399</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_a3ffa0974d8cdcd5b8ca7afb3cfbaf53ca9780bd19bf6706355258ca09ddeab335"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#a3ffa0974d8cdcd5b8ca7afb3cfbaf53ca9780bd19bf6706355258ca09ddeab335">tvm::topi::nn::kAvgPool</a></div><div class="ttdef"><b>Definition:</b> pooling.h:45</div></div>
diff --git a/docs/reference/api/doxygen/nn_2softmax_8h_source.html b/docs/reference/api/doxygen/nn_2softmax_8h_source.html
index 04969ed96..e17fc2f84 100644
--- a/docs/reference/api/doxygen/nn_2softmax_8h_source.html
+++ b/docs/reference/api/doxygen/nn_2softmax_8h_source.html
@@ -75,7 +75,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_ad58b3ba5122294bd1eb045b5792c3976"><div class="ttname"><a href="namespacetvm_1_1topi.html#ad58b3ba5122294bd1eb045b5792c3976">tvm::topi::MakeReduceTargetShape</a></div><div class="ttdeci">Array&lt; PrimExpr &gt; MakeReduceTargetShape(const std::vector&lt; int &gt; &amp;real_axis, const Tensor &amp;data, bool keepdims, bool atleast1d)</div><div class="ttdoc">Calculate the target shape for a reduce op. </div><div class="ttdef"><b>Definition:</b [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_1_1nn_html_ac0e20b6b30ec8296c1f037866d3bf772"><div class="ttname"><a href="namespacetvm_1_1topi_1_1nn.html#ac0e20b6b30ec8296c1f037866d3bf772">tvm::topi::nn::log_softmax</a></div><div class="ttdeci">Tensor log_softmax(const Tensor &amp;x, std::string name=&quot;tensor&quot;, std::string tag=&quot;log_softmax_output&quot;)</div><div class="ttdoc">Log softmax activation. </div><div class="ttdef"><b>Definition:</b> softmax.h:126</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a38fe82b0db9eab041324da16e532baff"><div class="ttname"><a href="namespacetvm_1_1topi.html#a38fe82b0db9eab041324da16e532baff">tvm::topi::MaxOp</a></div><div class="ttdeci">PrimExpr MaxOp(PrimExpr source, Array&lt; IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">Wrap tvm::max to ensure we get the correct overload. </div><div class="ttdef"><b>Definition:</b> reduction.h:302</div></div>
 <div class="ttc" id="namespacetvm_html_afdad0c0329bd39949ba8d296cfb85d76"><div class="ttname"><a href="namespacetvm.html#afdad0c0329bd39949ba8d296cfb85d76">tvm::sum</a></div><div class="ttdeci">PrimExpr sum(PrimExpr source, Array&lt; tir::IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">sum of of source expression over axis </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
@@ -87,10 +87,10 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1Map_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Map.html">tvm::runtime::Map</a></div><div class="ttdoc">Map container of NodeRef-&gt;NodeRef in DSL graph. Map implements copy on write semantics, which means map is mutable but copy will happen when array is referenced in more than two places. </div><div class="ttdef"><b>Definition:</b> map.h:1268</div></div>
 <div class="ttc" id="tags_8h_html"><div class="ttname"><a href="tags_8h.html">tags.h</a></div><div class="ttdoc">External function interface to rocBLAS libraries. </div></div>
 <div class="ttc" id="namespacetvm_1_1te_html_afe4f57aeb3dd5ae9c0b58135e14d67ca"><div class="ttname"><a href="namespacetvm_1_1te.html#afe4f57aeb3dd5ae9c0b58135e14d67ca">tvm::te::compute</a></div><div class="ttdeci">Tensor compute(Array&lt; PrimExpr &gt; shape, FCompute fcompute, std::string name=&quot;tensor&quot;, std::string tag=&quot;&quot;, Map&lt; String, ObjectRef &gt; attrs={})</div><div class="ttdoc">Construct a new tensor by computing over shape, using the computation rule: resul [...]
-<div class="ttc" id="namespacetvm_html_a82be70bd7794abca32473604cbb09569"><div class="ttname"><a href="namespacetvm.html#a82be70bd7794abca32473604cbb09569">tvm::exp</a></div><div class="ttdeci">PrimExpr exp(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:888</div></div>
+<div class="ttc" id="namespacetvm_html_a82be70bd7794abca32473604cbb09569"><div class="ttname"><a href="namespacetvm.html#a82be70bd7794abca32473604cbb09569">tvm::exp</a></div><div class="ttdeci">PrimExpr exp(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:693</div></div>
 <div class="ttc" id="classtvm_1_1PrimExpr_html"><div class="ttname"><a href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a></div><div class="ttdoc">Reference to PrimExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:112</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Map_html_ad3a78d88e3a9292d11ce04ff2dfe0702"><div class="ttname"><a href="classtvm_1_1runtime_1_1Map.html#ad3a78d88e3a9292d11ce04ff2dfe0702">tvm::runtime::Map::Set</a></div><div class="ttdeci">void Set(const K &amp;key, const V &amp;value)</div><div class="ttdoc">set the Map. </div><div class="ttdef"><b>Definition:</b> map.h:1371</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/op__strategy_8h_source.html b/docs/reference/api/doxygen/op__strategy_8h_source.html
index 3fbd166d9..05839231a 100644
--- a/docs/reference/api/doxygen/op__strategy_8h_source.html
+++ b/docs/reference/api/doxygen/op__strategy_8h_source.html
@@ -100,7 +100,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1OpImplementation_html"><div class="ttname"><a href="classtvm_1_1relay_1_1OpImplementation.html">tvm::relay::OpImplementation</a></div><div class="ttdoc">Operator implementation class. </div><div class="ttdef"><b>Definition:</b> op_strategy.h:65</div></div>
 <div class="ttc" id="classtvm_1_1Type_html"><div class="ttname"><a href="classtvm_1_1Type.html">tvm::Type</a></div><div class="ttdoc">Managed reference to TypeNode. </div><div class="ttdef"><b>Definition:</b> type.h:93</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1OpStrategyNode_html_a9e3048aafa0fe847cb160fd8825f5220"><div class="ttname"><a href="classtvm_1_1relay_1_1OpStrategyNode.html#a9e3048aafa0fe847cb160fd8825f5220">tvm::relay::OpStrategyNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> op_strategy.h:135</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1OpImplementationNode_html_af7afc9d22e58ead1fc342808ed0c4304"><div class="ttname"><a href="classtvm_1_1relay_1_1OpImplementationNode.html#af7afc9d22e58ead1fc342808ed0c4304">tvm::relay::OpImplementationNode::plevel</a></div><div class="ttdeci">int plevel</div><div class="ttdoc">Priority level. </div><div class="ttdef"><b>Definition:</b> op_strategy.h:51</div></div>
 <div class="ttc" id="te_2schedule_8h_html"><div class="ttname"><a href="te_2schedule_8h.html">schedule.h</a></div><div class="ttdoc">Define a schedule. </div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1OpSpecializationNode_html_a8d5ffe97532ff7b663d2b7da09bd9042"><div class="ttname"><a href="classtvm_1_1relay_1_1OpSpecializationNode.html#a8d5ffe97532ff7b663d2b7da09bd9042">tvm::relay::OpSpecializationNode::implementations</a></div><div class="ttdeci">Array&lt; OpImplementation &gt; implementations</div><div class="ttdoc">List of implementations. </div><div class="ttdef"><b>Definition:</b> op_strategy.h:95</div></div>
diff --git a/docs/reference/api/doxygen/operation_8h_source.html b/docs/reference/api/doxygen/operation_8h_source.html
index 3096e64cc..decd4f704 100644
--- a/docs/reference/api/doxygen/operation_8h_source.html
+++ b/docs/reference/api/doxygen/operation_8h_source.html
@@ -104,7 +104,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1te_1_1HybridOpNode_html_a45104e4a1d31714b9eac76510d33fda5"><div class="ttname"><a href="classtvm_1_1te_1_1HybridOpNode.html#a45104e4a1d31714b9eac76510d33fda5">tvm::te::HybridOpNode::inputs</a></div><div class="ttdeci">Array&lt; Tensor &gt; inputs</div><div class="ttdoc">The input tensors. </div><div class="ttdef"><b>Definition:</b> operation.h:475</div></div>
 <div class="ttc" id="classtvm_1_1AttrVisitor_html"><div class="ttname"><a href="classtvm_1_1AttrVisitor.html">tvm::AttrVisitor</a></div><div class="ttdoc">Visitor class to get the attributes of an AST/IR node. The content is going to be called for each fie...</div><div class="ttdef"><b>Definition:</b> reflection.h:52</div></div>
 <div class="ttc" id="tensor_8h_html"><div class="ttname"><a href="tensor_8h.html">tensor.h</a></div><div class="ttdoc">Dataflow tensor object. </div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1HybridOpNode_html_a7d3899d1fae8ae3877e3361704b4d35e"><div class="ttname"><a href="classtvm_1_1te_1_1HybridOpNode.html#a7d3899d1fae8ae3877e3361704b4d35e">tvm::te::HybridOpNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> operation.h:507</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1TensorIntrin_html"><div class="ttname"><a href="classtvm_1_1te_1_1TensorIntrin.html">tvm::te::TensorIntrin</a></div><div class="ttdoc">Managed reference to TensorIntrinNode. </div><div class="ttdef"><b>Definition:</b> tensor_intrin.h:93</div></div>
 <div class="ttc" id="classtvm_1_1te_1_1TensorComputeOpNode_html_a6448f522f6326aaed2d4137376c9fc78"><div class="ttname"><a href="classtvm_1_1te_1_1TensorComputeOpNode.html#a6448f522f6326aaed2d4137376c9fc78">tvm::te::TensorComputeOpNode::TensorComputeOpNode</a></div><div class="ttdeci">TensorComputeOpNode()</div><div class="ttdoc">constructor </div><div class="ttdef"><b>Definition:</b> operation.h:287</div></div>
diff --git a/docs/reference/api/doxygen/reduction_8h_source.html b/docs/reference/api/doxygen/reduction_8h_source.html
index 2dada07d9..b653450a0 100644
--- a/docs/reference/api/doxygen/reduction_8h_source.html
+++ b/docs/reference/api/doxygen/reduction_8h_source.html
@@ -78,7 +78,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1Reduce_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Reduce.html">tvm::tir::Reduce</a></div><div class="ttdoc">Managed reference to ReduceNode. </div><div class="ttdef"><b>Definition:</b> expr.h:1121</div></div>
 <div class="ttc" id="namespacetvm_html_a3b37fa55ea93d6868751a2441996b072"><div class="ttname"><a href="namespacetvm.html#a3b37fa55ea93d6868751a2441996b072">tvm::min_value</a></div><div class="ttdeci">PrimExpr min_value(const DataType &amp;dtype, Span span=Span())</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a4b434e701bc9835e2a7de8f0fadebea5"><div class="ttname"><a href="namespacetvm_1_1topi.html#a4b434e701bc9835e2a7de8f0fadebea5">tvm::topi::MakeArgmaxReducer</a></div><div class="ttdeci">FCommReduce MakeArgmaxReducer(bool select_last_index=false)</div><div class="ttdef"><b>Definition:</b> reduction.h:495</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_ae488679377c78cd5411b7df11c297673"><div class="ttname"><a href="namespacetvm_1_1topi.html#ae488679377c78cd5411b7df11c297673">tvm::topi::min</a></div><div class="ttdeci">Tensor min(const Tensor &amp;data, const Array&lt; Integer &gt; &amp;axis, bool keepdims=false, bool atleast1d=false)</div><div class="ttdoc">Creates an operation that finds the minimum of elements over a given axis. </div><div class="ttdef"><b>Definition:</b> reduction.h:410 [...]
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
@@ -88,8 +88,8 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_a988ca437c8085900c96ff750521af96f"><div class="ttname"><a href="namespacetvm_1_1topi.html#a988ca437c8085900c96ff750521af96f">tvm::topi::MakeArgminReducer</a></div><div class="ttdeci">FCommReduce MakeArgminReducer(bool select_last_index=false)</div><div class="ttdef"><b>Definition:</b> reduction.h:434</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_abee7c35e8c15e2e61afe35852dfcb252"><div class="ttname"><a href="namespacetvm_1_1topi.html#abee7c35e8c15e2e61afe35852dfcb252">tvm::topi::sum</a></div><div class="ttdeci">Tensor sum(const Tensor &amp;data, const Array&lt; Integer &gt; &amp;axis, bool keepdims=false, bool atleast1d=false)</div><div class="ttdoc">Creates an operation that sums array elements over a given axis. </div><div class="ttdef"><b>Definition:</b> reduction.h:326</div></div>
 <div class="ttc" id="constant__utils_8h_html"><div class="ttname"><a href="constant__utils_8h.html">constant_utils.h</a></div><div class="ttdoc">Utility functions for handling constants in TVM expressions. </div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a8dd84303a9864b5b366835fa628a7824"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8dd84303a9864b5b366835fa628a7824">tvm::tir::const_true</a></div><div class="ttdeci">PrimExpr const_true(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant true expression. </div><div class="ttdef"><b>Definition:</b> op.h:967</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a8dd84303a9864b5b366835fa628a7824"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8dd84303a9864b5b366835fa628a7824">tvm::tir::const_true</a></div><div class="ttdeci">PrimExpr const_true(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant true expression. </div><div class="ttdef"><b>Definition:</b> op.h:772</div></div>
 <div class="ttc" id="classtvm_1_1Span_html"><div class="ttname"><a href="classtvm_1_1Span.html">tvm::Span</a></div><div class="ttdef"><b>Definition:</b> span.h:115</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aed6387e67d18b9d5ad18f510fd600a25"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aed6387e67d18b9d5ad18f510fd600a25">tvm::runtime::Array::size</a></div><div class="ttdeci">size_t size() const</div><div class="ttdef"><b>Definition:</b> array.h:399</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a38fe82b0db9eab041324da16e532baff"><div class="ttname"><a href="namespacetvm_1_1topi.html#a38fe82b0db9eab041324da16e532baff">tvm::topi::MaxOp</a></div><div class="ttdeci">PrimExpr MaxOp(PrimExpr source, Array&lt; IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">Wrap tvm::max to ensure we get the correct overload. </div><div class="ttdef"><b>Definition:</b> reduction.h:302</div></div>
diff --git a/docs/reference/api/doxygen/relay_2adt_8h_source.html b/docs/reference/api/doxygen/relay_2adt_8h_source.html
index 24548573e..b31d76e64 100644
--- a/docs/reference/api/doxygen/relay_2adt_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2adt_8h_source.html
@@ -124,7 +124,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1TypeDataNode_html"><div class="ttname"><a href="classtvm_1_1TypeDataNode.html">tvm::TypeDataNode</a></div><div class="ttdoc">TypeData container node. </div><div class="ttdef"><b>Definition:</b> adt.h:102</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_ac261cdb80487fb29ac42b28678f8cbef"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#ac261cdb80487fb29ac42b28678f8cbef">tvm::runtime::ObjectRef::data_</a></div><div class="ttdeci">ObjectPtr&lt; Object &gt; data_</div><div class="ttdoc">Internal pointer that backs the reference. </div><div class="ttdef"><b>Definition:</b> object.h:574</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ClauseNode_html_a0e8b70367fd4938f0e01f28c9226c256"><div class="ttname"><a href="classtvm_1_1relay_1_1ClauseNode.html#a0e8b70367fd4938f0e01f28c9226c256">tvm::relay::ClauseNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> adt.h:242</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1PatternVarNode_html_a708bf5e7f6d95a62eab6838c17563f69"><div class="ttname"><a href="classtvm_1_1relay_1_1PatternVarNode.html#a708bf5e7f6d95a62eab6838c17563f69">tvm::relay::PatternVarNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> adt.h:121</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1Pattern_html"><div class="ttname"><a href="classtvm_1_1relay_1_1Pattern.html">tvm::relay::Pattern</a></div><div class="ttdoc">Pattern is the base type for an ADT match pattern in Relay. </div><div class="ttdef"><b>Definition:</b> adt.h:63</div></div>
@@ -154,7 +154,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1ClauseNode_html_ac73f65e5e9187d74aaa0a47c45cb6eb8"><div class="ttname"><a href="classtvm_1_1relay_1_1ClauseNode.html#ac73f65e5e9187d74aaa0a47c45cb6eb8">tvm::relay::ClauseNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> adt.h:233</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a6e725a1cb4c83346e261eac7dc7292a8"><div class="ttname"><a href="namespacetvm_1_1relay.html#a6e725a1cb4c83346e261eac7dc7292a8">tvm::relay::TypeData</a></div><div class="ttdeci">tvm::TypeData TypeData</div><div class="ttdef"><b>Definition:</b> adt.h:43</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1Clause_html"><div class="ttname"><a href="classtvm_1_1relay_1_1Clause.html">tvm::relay::Clause</a></div><div class="ttdef"><b>Definition:</b> adt.h:253</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1PatternWildcard_html_aef430b36a4be504969829f800e126245"><div class="ttname"><a href="classtvm_1_1relay_1_1PatternWildcard.html#aef430b36a4be504969829f800e126245">tvm::relay::PatternWildcard::operator=</a></div><div class="ttdeci">PatternWildcard &amp; operator=(const PatternWildcard &amp;other)</div><div class="ttdef"><b>Definition:</b> adt.h:96</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1PatternWildcardNode_html_a0734c7eeca5c4f86d6fcd021ed1cafa2"><div class="ttname"><a href="classtvm_1_1relay_1_1PatternWildcardNode.html#a0734c7eeca5c4f86d6fcd021ed1cafa2">tvm::relay::PatternWildcardNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> adt.h:80</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1PatternTupleNode_html"><div class="ttname"><a href="classtvm_1_1relay_1_1PatternTupleNode.html">tvm::relay::PatternTupleNode</a></div><div class="ttdoc">PatternVar container node. </div><div class="ttdef"><b>Definition:</b> adt.h:191</div></div>
diff --git a/docs/reference/api/doxygen/relay_2attrs_2transform_8h_source.html b/docs/reference/api/doxygen/relay_2attrs_2transform_8h_source.html
index ee0d79202..4047d7fa7 100644
--- a/docs/reference/api/doxygen/relay_2attrs_2transform_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2attrs_2transform_8h_source.html
@@ -78,7 +78,7 @@ $(function() {
 <div class="ttc" id="structtvm_1_1relay_1_1ReshapeLikeAttrs_html_ab858f0ecef7ac56b1f9d69b57eb6f3c8"><div class="ttname"><a href="structtvm_1_1relay_1_1ReshapeLikeAttrs.html#ab858f0ecef7ac56b1f9d69b57eb6f3c8">tvm::relay::ReshapeLikeAttrs::lhs_begin</a></div><div class="ttdeci">int lhs_begin</div><div class="ttdef"><b>Definition:</b> transform.h:133</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1MatrixSetDiagAttrs_html_a4ff9895cebf85396e817f40d9209cff1"><div class="ttname"><a href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html#a4ff9895cebf85396e817f40d9209cff1">tvm::relay::MatrixSetDiagAttrs::TVM_DECLARE_ATTRS</a></div><div class="ttdeci">TVM_DECLARE_ATTRS(MatrixSetDiagAttrs, &quot;relay.attrs.MatrixSetDiagAttrs&quot;)</div><div class="ttdef"><b>Definition:</b> transform.h:507</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1CastAttrs_html"><div class="ttname"><a href="structtvm_1_1relay_1_1CastAttrs.html">tvm::relay::CastAttrs</a></div><div class="ttdoc">data type cast </div><div class="ttdef"><b>Definition:</b> transform.h:61</div></div>
-<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:369</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:583</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1FixedPointMultiplyAttrs_html"><div class="ttname"><a href="structtvm_1_1relay_1_1FixedPointMultiplyAttrs.html">tvm::relay::FixedPointMultiplyAttrs</a></div><div class="ttdoc">Attributes for FixedPointMultiply operator. </div><div class="ttdef"><b>Definition:</b> transform.h:396</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ExpandDimsAttrs_html_a289f19d5f47a1242a04a909c73ca34ac"><div class="ttname"><a href="structtvm_1_1relay_1_1ExpandDimsAttrs.html#a289f19d5f47a1242a04a909c73ca34ac">tvm::relay::ExpandDimsAttrs::axis</a></div><div class="ttdeci">int axis</div><div class="ttdef"><b>Definition:</b> transform.h:71</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ReshapeAttrs_html_a53162b9a7f6232a8d599f58ffafce930"><div class="ttname"><a href="structtvm_1_1relay_1_1ReshapeAttrs.html#a53162b9a7f6232a8d599f58ffafce930">tvm::relay::ReshapeAttrs::allowzero</a></div><div class="ttdeci">bool allowzero</div><div class="ttdef"><b>Definition:</b> transform.h:122</div></div>
@@ -163,7 +163,7 @@ $(function() {
 <div class="ttc" id="structtvm_1_1relay_1_1ScatterNDAttrs_html_ab13eeaa700fe7e41666ac04179e0fd62"><div class="ttname"><a href="structtvm_1_1relay_1_1ScatterNDAttrs.html#ab13eeaa700fe7e41666ac04179e0fd62">tvm::relay::ScatterNDAttrs::mode</a></div><div class="ttdeci">String mode</div><div class="ttdef"><b>Definition:</b> transform.h:168</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1InitOpAttrs_html_a549d7ca42a8ac4f7db2b96f276de6266"><div class="ttname"><a href="structtvm_1_1relay_1_1InitOpAttrs.html#a549d7ca42a8ac4f7db2b96f276de6266">tvm::relay::InitOpAttrs::TVM_DECLARE_ATTRS</a></div><div class="ttdeci">TVM_DECLARE_ATTRS(InitOpAttrs, &quot;relay.attrs.InitOpAttrs&quot;)</div><div class="ttdef"><b>Definition:</b> transform.h:225</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ScatterAddAttrs_html_aa266a70026db0feb88f57a08fb4d3303"><div class="ttname"><a href="structtvm_1_1relay_1_1ScatterAddAttrs.html#aa266a70026db0feb88f57a08fb4d3303">tvm::relay::ScatterAddAttrs::TVM_DECLARE_ATTRS</a></div><div class="ttdeci">TVM_DECLARE_ATTRS(ScatterAddAttrs, &quot;relay.attrs.ScatterAddAttrs&quot;)</div><div class="ttdef"><b>Definition:</b> transform.h:162</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1MetaScheduleLayoutTransformAttrs_html"><div class="ttname"><a href="structtvm_1_1relay_1_1MetaScheduleLayoutTransformAttrs.html">tvm::relay::MetaScheduleLayoutTransformAttrs</a></div><div class="ttdoc">Attributes for MetaScheduleLayoutTransform operator. </div><div class="ttdef"><b>Definition:</b> transform.h:434</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ArangeAttrs_html_a1eadf1f3964ca83dade8edeae7d6d7cf"><div class="ttname"><a href="structtvm_1_1relay_1_1ArangeAttrs.html#a1eadf1f3964ca83dade8edeae7d6d7cf">tvm::relay::ArangeAttrs::stop</a></div><div class="ttdeci">Expr stop</div><div class="ttdef"><b>Definition:</b> transform.h:234</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1SlidingWindowAttrs_html_a74e2768c0ff2825a95c17c2a9a034254"><div class="ttname"><a href="structtvm_1_1relay_1_1SlidingWindowAttrs.html#a74e2768c0ff2825a95c17c2a9a034254">tvm::relay::SlidingWindowAttrs::strides</a></div><div class="ttdeci">Array&lt; Integer &gt; strides</div><div class="ttdef"><b>Definition:</b> transform.h:41</div></div>
@@ -242,7 +242,7 @@ $(function() {
 <div class="ttc" id="structtvm_1_1relay_1_1MatrixSetDiagAttrs_html_a181f59c653c46220afdd98605b6d6e34"><div class="ttname"><a href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html#a181f59c653c46220afdd98605b6d6e34">tvm::relay::MatrixSetDiagAttrs::k2</a></div><div class="ttdeci">int k2</div><div class="ttdef"><b>Definition:</b> transform.h:503</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ConcatenateAttrs_html"><div class="ttname"><a href="structtvm_1_1relay_1_1ConcatenateAttrs.html">tvm::relay::ConcatenateAttrs</a></div><div class="ttdoc">Attributes used in concatenate operators. </div><div class="ttdef"><b>Definition:</b> transform.h:100</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ReverseSequenceAttrs_html_a922eb49dd420d3a148f206efca0e0b48"><div class="ttname"><a href="structtvm_1_1relay_1_1ReverseSequenceAttrs.html#a922eb49dd420d3a148f206efca0e0b48">tvm::relay::ReverseSequenceAttrs::batch_axis</a></div><div class="ttdeci">Integer batch_axis</div><div class="ttdef"><b>Definition:</b> transform.h:303</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1TakeAttrs_html_a5d4f821c5541cb9deb71d835b144cb22"><div class="ttname"><a href="structtvm_1_1relay_1_1TakeAttrs.html#a5d4f821c5541cb9deb71d835b144cb22">tvm::relay::TakeAttrs::axis</a></div><div class="ttdeci">Integer axis</div><div class="ttdef"><b>Definition:</b> transform.h:202</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/reference/api/doxygen/relay_2expr_8h_source.html b/docs/reference/api/doxygen/relay_2expr_8h_source.html
index efc50253c..ad9ee71f6 100644
--- a/docs/reference/api/doxygen/relay_2expr_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2expr_8h_source.html
@@ -139,7 +139,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1RefCreate_html"><div class="ttname"><a href="classtvm_1_1relay_1_1RefCreate.html">tvm::relay::RefCreate</a></div><div class="ttdef"><b>Definition:</b> expr.h:635</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a81ac7c3d0824529fddce7849c9c66289"><div class="ttname"><a href="namespacetvm_1_1relay.html#a81ac7c3d0824529fddce7849c9c66289">tvm::relay::GlobalVar</a></div><div class="ttdeci">tvm::GlobalVar GlobalVar</div><div class="ttdef"><b>Definition:</b> expr.h:58</div></div>
 <div class="ttc" id="classtvm_1_1SHashReducer_1_1Handler_html_a8f9a489881fc55552f13a58313a863cf"><div class="ttname"><a href="classtvm_1_1SHashReducer_1_1Handler.html#a8f9a489881fc55552f13a58313a863cf">tvm::SHashReducer::Handler::MarkGraphNode</a></div><div class="ttdeci">virtual void MarkGraphNode()=0</div><div class="ttdoc">Mark current comparison as graph node in hashing. Graph node hash will depends on the graph structure...</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:153</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:367</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1RefWriteNode_html_a0be5c88dea2afb4246d692f222048694"><div class="ttname"><a href="classtvm_1_1relay_1_1RefWriteNode.html#a0be5c88dea2afb4246d692f222048694">tvm::relay::RefWriteNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:729</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_a905dcf65204e877b6ccb977cf375f2a0"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#a905dcf65204e877b6ccb977cf375f2a0">tvm::BaseExprNode::_type_has_method_sequal_reduce</a></div><div class="ttdeci">static constexpr const bool _type_has_method_sequal_reduce</div><div class="ttdef"><b>Definition:</b> expr.h:58</div></div>
@@ -148,15 +148,15 @@ $(function() {
 <div class="ttc" id="relay_2type_8h_html"><div class="ttname"><a href="relay_2type_8h.html">type.h</a></div><div class="ttdoc">Relay typed AST nodes. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Object_html_a9e84841ca982bff376a978ade0132631"><div class="ttname"><a href="classtvm_1_1runtime_1_1Object.html#a9e84841ca982bff376a978ade0132631">tvm::runtime::Object::FDeleter</a></div><div class="ttdeci">void(* FDeleter)(Object *self)</div><div class="ttdoc">Object deleter. </div><div class="ttdef"><b>Definition:</b> object.h:173</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1CallNode_html_a28ee1603f20958c8c4fe54d8df3ac558"><div class="ttname"><a href="classtvm_1_1relay_1_1CallNode.html#a28ee1603f20958c8c4fe54d8df3ac558">tvm::relay::CallNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:324</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:261</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVar_html"><div class="ttname"><a href="classtvm_1_1GlobalVar.html">tvm::GlobalVar</a></div><div class="ttdoc">Managed reference to GlobalVarNode. </div><div class="ttdef"><b>Definition:</b> expr.h:475</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1IfNode_html_ae6a504717890d97b574b93dba29e2641"><div class="ttname"><a href="classtvm_1_1relay_1_1IfNode.html#ae6a504717890d97b574b93dba29e2641">tvm::relay::IfNode::false_branch</a></div><div class="ttdeci">Expr false_branch</div><div class="ttdoc">The expression evaluated when condition is false. </div><div class="ttdef"><b>Definition:</b> expr.h:499</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TupleNode_html"><div class="ttname"><a href="classtvm_1_1relay_1_1TupleNode.html">tvm::relay::TupleNode</a></div><div class="ttdoc">Tuple container. </div><div class="ttdef"><b>Definition:</b> expr.h:124</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ConstantNode_html_a8103d52981ef61f22fd4437c458217ae"><div class="ttname"><a href="classtvm_1_1relay_1_1ConstantNode.html#a8103d52981ef61f22fd4437c458217ae">tvm::relay::ConstantNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const ConstantNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:90</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ad6ea21e1a03d972ac5cf81b80b88b2c4"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ad6ea21e1a03d972ac5cf81b80b88b2c4">tvm::RelayExprNode::virtual_device_</a></div><div class="ttdeci">ObjectRef virtual_device_</div><div class="ttdoc">The virtual device (VirtualDevice) for this node (the result of device planning). For first-order exp...</div><div class="ttdef"><b>Definition:</b> expr.h:193</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ad6ea21e1a03d972ac5cf81b80b88b2c4"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ad6ea21e1a03d972ac5cf81b80b88b2c4">tvm::RelayExprNode::virtual_device_</a></div><div class="ttdeci">ObjectRef virtual_device_</div><div class="ttdoc">The virtual device (VirtualDevice) for this node (the result of device planning). For first-order exp...</div><div class="ttdef"><b>Definition:</b> expr.h:407</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ConstantNode_html_ad321216b91f256cbd425b5ca251e3ed4"><div class="ttname"><a href="classtvm_1_1relay_1_1ConstantNode.html#ad321216b91f256cbd425b5ca251e3ed4">tvm::relay::ConstantNode::TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_FINAL_OBJECT_INFO(ConstantNode, ExprNode)</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1RefReadNode_html_a3d8b18cdfa287861088148040b8de159"><div class="ttname"><a href="classtvm_1_1relay_1_1RefReadNode.html#a3d8b18cdfa287861088148040b8de159">tvm::relay::RefReadNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:676</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1CallNode_html_a918ffd45556abd8b476e8e5d701e95fc"><div class="ttname"><a href="classtvm_1_1relay_1_1CallNode.html#a918ffd45556abd8b476e8e5d701e95fc">tvm::relay::CallNode::saved_deleter_</a></div><div class="ttdeci">Object::FDeleter saved_deleter_</div><div class="ttdef"><b>Definition:</b> expr.h:286</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1Let_html"><div class="ttname"><a href="classtvm_1_1relay_1_1Let.html">tvm::relay::Let</a></div><div class="ttdef"><b>Definition:</b> expr.h:448</div></div>
@@ -193,13 +193,13 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1RefWriteNode_html_a3f9952c82acd7f413188a9920b239e37"><div class="ttname"><a href="classtvm_1_1relay_1_1RefWriteNode.html#a3f9952c82acd7f413188a9920b239e37">tvm::relay::RefWriteNode::value</a></div><div class="ttdeci">Expr value</div><div class="ttdoc">The value to write into. </div><div class="ttdef"><b>Definition:</b> expr.h:714</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TupleGetItemNode_html_a10c16ec87c849fac7c8d4dedfaaea827"><div class="ttname"><a href="classtvm_1_1relay_1_1TupleGetItemNode.html#a10c16ec87c849fac7c8d4dedfaaea827">tvm::relay::TupleGetItemNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const TupleGetItemNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:570</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TupleNode_html_ae9e351b42b8cde177714c98c5d353e29"><div class="ttname"><a href="classtvm_1_1relay_1_1TupleNode.html#ae9e351b42b8cde177714c98c5d353e29">tvm::relay::TupleNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:129</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html">tvm::GlobalVarNode</a></div><div class="ttdoc">Global variable that lives in the top-level module. </div><div class="ttdef"><b>Definition:</b> expr.h:231</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html">tvm::GlobalVarNode</a></div><div class="ttdoc">Global variable that lives in the top-level module. </div><div class="ttdef"><b>Definition:</b> expr.h:445</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1LetNode_html_a774cad50b0de361637d715f3e1ec6f10"><div class="ttname"><a href="classtvm_1_1relay_1_1LetNode.html#a774cad50b0de361637d715f3e1ec6f10">tvm::relay::LetNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:434</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:145</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html">tvm::RelayExprNode</a></div><div class="ttdoc">Base node of all non-primitive expressions. </div><div class="ttdef"><b>Definition:</b> expr.h:359</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1If_html"><div class="ttname"><a href="classtvm_1_1relay_1_1If.html">tvm::relay::If</a></div><div class="ttdef"><b>Definition:</b> expr.h:527</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1TupleGetItemNode_html_a72d32db7efcd228f67f27b632cf49cbc"><div class="ttname"><a href="classtvm_1_1relay_1_1TupleGetItemNode.html#a72d32db7efcd228f67f27b632cf49cbc">tvm::relay::TupleGetItemNode::index</a></div><div class="ttdeci">int index</div><div class="ttdoc">which value to get </div><div class="ttdef"><b>Definition:</b> expr.h:560</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ConstantNode_html_ae13d0996e9324caf76cc1fa8d7aa00f9"><div class="ttname"><a href="classtvm_1_1relay_1_1ConstantNode.html#ae13d0996e9324caf76cc1fa8d7aa00f9">tvm::relay::ConstantNode::data</a></div><div class="ttdeci">runtime::NDArray data</div><div class="ttdoc">The data of the tensor. </div><div class="ttdef"><b>Definition:</b> expr.h:75</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_af4f200209d8efb41cbc7fc2167180727"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#af4f200209d8efb41cbc7fc2167180727">tvm::RelayExprNode::_type_child_slots</a></div><div class="ttdeci">static constexpr const uint32_t _type_child_slots</div><div class="ttdef"><b>Definition:</b> expr.h:209</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_af4f200209d8efb41cbc7fc2167180727"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#af4f200209d8efb41cbc7fc2167180727">tvm::RelayExprNode::_type_child_slots</a></div><div class="ttdeci">static constexpr const uint32_t _type_child_slots</div><div class="ttdef"><b>Definition:</b> expr.h:423</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1IfNode_html"><div class="ttname"><a href="classtvm_1_1relay_1_1IfNode.html">tvm::relay::IfNode</a></div><div class="ttdoc">container of If </div><div class="ttdef"><b>Definition:</b> expr.h:492</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1RefReadNode_html_a70ea5750f981305f04d0c851e0b46970"><div class="ttname"><a href="classtvm_1_1relay_1_1RefReadNode.html#a70ea5750f981305f04d0c851e0b46970">tvm::relay::RefReadNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(tvm::AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:664</div></div>
 <div class="ttc" id="classtvm_1_1SHashReducer_html_a74260485bd50d1bfa52ded457a6a7777"><div class="ttname"><a href="classtvm_1_1SHashReducer.html#a74260485bd50d1bfa52ded457a6a7777">tvm::SHashReducer::DefHash</a></div><div class="ttdeci">void DefHash(const ObjectRef &amp;key) const</div><div class="ttdoc">Push hash of key to the current sequence of hash values. </div><div class="ttdef"><b>Definition:</b> structural_hash.h:179</div></div>
diff --git a/docs/reference/api/doxygen/relay_2expr__functor_8h_source.html b/docs/reference/api/doxygen/relay_2expr__functor_8h_source.html
index 344ba01b3..fc4d394a8 100644
--- a/docs/reference/api/doxygen/relay_2expr__functor_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2expr__functor_8h_source.html
@@ -135,7 +135,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4_html_a98012bc26e17c61ef8269d1eda9d85ff"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html#a98012bc26e17c61ef8269d1eda9d85ff">tvm::relay::ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;::operator()</a></div><div class="ttdeci">R operator()(const Expr &amp;n, Args... args)</div><div class="ttdoc">Same as call. </div><div cl [...]
 <div class="ttc" id="classtvm_1_1Op_html_ac848a0ad6a19a5fef7df4fdcb8e5f24f"><div class="ttname"><a href="classtvm_1_1Op.html#ac848a0ad6a19a5fef7df4fdcb8e5f24f">tvm::Op::Get</a></div><div class="ttdeci">static const Op &amp; Get(const String &amp;op_name)</div><div class="ttdoc">Get an Op for a given operator name. Will raise an error if the op has not been registered. </div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExprRewriter_html_a867da2f78446c33e201da79e9e1a0b2e"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprRewriter.html#a867da2f78446c33e201da79e9e1a0b2e">tvm::relay::ExprRewriter::Rewrite_</a></div><div class="ttdeci">virtual Expr Rewrite_(const OpNode *pre, const Expr &amp;post)</div><div class="ttdef"><b>Definition:</b> expr_functor.h:373</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1MixedModeMutator_html_a81d6c2593e361659ed2d0bea78a8f58a"><div class="ttname"><a href="classtvm_1_1relay_1_1MixedModeMutator.html#a81d6c2593e361659ed2d0bea78a8f58a">tvm::relay::MixedModeMutator::pre_</a></div><div class="ttdeci">bool pre_</div><div class="ttdef"><b>Definition:</b> expr_functor.h:306</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1v__info_html_a00cc81b690617d77cbc6cfb500fde1ee"><div class="ttname"><a href="structtvm_1_1relay_1_1v__info.html#a00cc81b690617d77cbc6cfb500fde1ee">tvm::relay::v_info::v_info</a></div><div class="ttdeci">v_info(Expr node_)</div><div class="ttdef"><b>Definition:</b> expr_functor.h:427</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1v__info_html"><div class="ttname"><a href="structtvm_1_1relay_1_1v__info.html">tvm::relay::v_info</a></div><div class="ttdoc">A struct to keep info of traversed expr in ExpandDataflow function. </div><div class="ttdef"><b>Definition:</b> expr_functor.h:426</div></div>
@@ -168,7 +168,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1relay_1_1ExprRewriter_html_a802445f1b6020e3397ea2824b98d4747"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprRewriter.html#a802445f1b6020e3397ea2824b98d4747">tvm::relay::ExprRewriter::Rewrite_</a></div><div class="ttdeci">virtual Expr Rewrite_(const VarNode *pre, const Expr &amp;post)</div><div class="ttdef"><b>Definition:</b> expr_functor.h:365</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExprMutator_html_ab8d4855c77e09d2274716ca2bbcf0a86"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprMutator.html#ab8d4855c77e09d2274716ca2bbcf0a86">tvm::relay::ExprMutator::Mutate</a></div><div class="ttdeci">Expr Mutate(const Expr &amp;expr)</div><div class="ttdoc">Mutate is alias for VisitExpr. </div><div class="ttdef"><b>Definition:</b> expr_functor.h:190</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExprMutator_html_a605c320fa66c769991738380bc8a69c8"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprMutator.html#a605c320fa66c769991738380bc8a69c8">tvm::relay::ExprMutator::memo_</a></div><div class="ttdeci">std::unordered_map&lt; Expr, Expr, ObjectPtrHash, ObjectPtrEqual &gt; memo_</div><div class="ttdoc">Internal map used for memoization. </div><div class="ttdef"><b>Definition:</b> expr_functor.h:221</div></div>
-<div class="ttc" id="classtvm_1_1GlobalVarNode_html"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html">tvm::GlobalVarNode</a></div><div class="ttdoc">Global variable that lives in the top-level module. </div><div class="ttdef"><b>Definition:</b> expr.h:231</div></div>
+<div class="ttc" id="classtvm_1_1GlobalVarNode_html"><div class="ttname"><a href="classtvm_1_1GlobalVarNode.html">tvm::GlobalVarNode</a></div><div class="ttdoc">Global variable that lives in the top-level module. </div><div class="ttdef"><b>Definition:</b> expr.h:445</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a2d76fa1fb628ff276a284e61123589c5"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a2d76fa1fb628ff276a284e61123589c5">tvm::runtime::ObjectRef::as</a></div><div class="ttdeci">const ObjectType * as() const</div><div class="ttdoc">Try to downcast the internal Object to a raw pointer of a corresponding type. </div><div class="ttdef"><b>Definition:</b> object.h:865</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4_html_a38bcce770bb5dc94f72996d6a1da273a"><div class="ttname"><a href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html#a38bcce770bb5dc94f72996d6a1da273a">tvm::relay::ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const ConstantNode *op, Args... args)</div><div class="ttdef"><b>Definition: [...]
 <div class="ttc" id="classtvm_1_1relay_1_1Clause_html"><div class="ttname"><a href="classtvm_1_1relay_1_1Clause.html">tvm::relay::Clause</a></div><div class="ttdef"><b>Definition:</b> adt.h:253</div></div>
diff --git a/docs/reference/api/doxygen/relay_2feature_8h_source.html b/docs/reference/api/doxygen/relay_2feature_8h_source.html
index 32d875653..d8d97d32d 100644
--- a/docs/reference/api/doxygen/relay_2feature_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2feature_8h_source.html
@@ -92,7 +92,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1relay_html_a926750aeda977340aa82bceac863fc9ea53a8eba57c4a3abd37b4c96fb98bf61b"><div class="ttname"><a href="namespacetvm_1_1relay.html#a926750aeda977340aa82bceac863fc9ea53a8eba57c4a3abd37b4c96fb98bf61b">tvm::relay::fMatch</a></div><div class="ttdef"><b>Definition:</b> feature.h:52</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a62b651084b386991221bc32c020cbef5"><div class="ttname"><a href="namespacetvm_1_1relay.html#a62b651084b386991221bc32c020cbef5">tvm::relay::DetectFeature</a></div><div class="ttdeci">FeatureSet DetectFeature(const RelayExpr &amp;expr)</div><div class="ttdoc">Calculate the feature of the program. </div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a926750aeda977340aa82bceac863fc9ea42a71e7934e2efe344713b17c426eecd"><div class="ttname"><a href="namespacetvm_1_1relay.html#a926750aeda977340aa82bceac863fc9ea42a71e7934e2efe344713b17c426eecd">tvm::relay::fVar</a></div><div class="ttdef"><b>Definition:</b> feature.h:38</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1FeatureSet_html_abbedfbde4f544c66e6129181b3c29c2a"><div class="ttname"><a href="classtvm_1_1relay_1_1FeatureSet.html#abbedfbde4f544c66e6129181b3c29c2a">tvm::relay::FeatureSet::All</a></div><div class="ttdeci">static FeatureSet All()</div><div class="ttdoc">A set that contain all the Feature. </div><div class="ttdef"><b>Definition:</b> feature.h:84</div></div>
 <div class="ttc" id="classtvm_1_1IRModule_html"><div class="ttname"><a href="classtvm_1_1IRModule.html">tvm::IRModule</a></div><div class="ttdoc">Managed reference class to IRModuleNode. </div><div class="ttdef"><b>Definition:</b> module.h:360</div></div>
@@ -107,7 +107,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1relay_html_a926750aeda977340aa82bceac863fc9ea07ce432e7b4b4c044af03e4dbd210ba4"><div class="ttname"><a href="namespacetvm_1_1relay.html#a926750aeda977340aa82bceac863fc9ea07ce432e7b4b4c044af03e4dbd210ba4">tvm::relay::fLet</a></div><div class="ttdef"><b>Definition:</b> feature.h:46</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_aaa95d3ad68932ab206efbe0a326db6a2"><div class="ttname"><a href="namespacetvm_1_1topi.html#aaa95d3ad68932ab206efbe0a326db6a2">tvm::topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:290</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1FeatureSet_html_a7fe9317883438c23defc65d12532387a"><div class="ttname"><a href="classtvm_1_1relay_1_1FeatureSet.html#a7fe9317883438c23defc65d12532387a">tvm::relay::FeatureSet::FeatureSet</a></div><div class="ttdeci">FeatureSet(Feature ft)</div><div class="ttdoc">A singleton set containing a single Feature. </div><div class="ttdef"><b>Definition:</b> feature.h:68</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/relay_2function_8h_source.html b/docs/reference/api/doxygen/relay_2function_8h_source.html
index ff7ffc7d0..bb250def1 100644
--- a/docs/reference/api/doxygen/relay_2function_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2function_8h_source.html
@@ -90,13 +90,13 @@ $(function() {
 <div class="ttc" id="classtvm_1_1Span_html"><div class="ttname"><a href="classtvm_1_1Span.html">tvm::Span</a></div><div class="ttdef"><b>Definition:</b> span.h:115</div></div>
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_ae3a0760a9f8b1379bc86f13e8bb3a22e"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#ae3a0760a9f8b1379bc86f13e8bb3a22e">tvm::BaseExprNode::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">Span that points to the original source code. Reserved debug information. </div><div class="ttdef"><b>Definition:</b> expr.h:55</div></div>
 <div class="ttc" id="classtvm_1_1SHashReducer_1_1Handler_html_a8f9a489881fc55552f13a58313a863cf"><div class="ttname"><a href="classtvm_1_1SHashReducer_1_1Handler.html#a8f9a489881fc55552f13a58313a863cf">tvm::SHashReducer::Handler::MarkGraphNode</a></div><div class="ttdeci">virtual void MarkGraphNode()=0</div><div class="ttdoc">Mark current comparison as graph node in hashing. Graph node hash will depends on the graph structure...</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:153</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:367</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1FunctionNode_html"><div class="ttname"><a href="classtvm_1_1relay_1_1FunctionNode.html">tvm::relay::FunctionNode</a></div><div class="ttdoc">Relay Function container. </div><div class="ttdef"><b>Definition:</b> function.h:39</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1attr_html_afe7f6dd3b19c62aced68364ae2414aae"><div class="ttname"><a href="namespacetvm_1_1relay_1_1attr.html#afe7f6dd3b19c62aced68364ae2414aae">tvm::relay::attr::kPartitionedFromPattern</a></div><div class="ttdeci">constexpr const char * kPartitionedFromPattern</div><div class="ttdoc">Indicate the function was created by the Pattern Partitioning Pass. </div><div class="ttdef"><b>Definition:</b> function.h:195</div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ad6ea21e1a03d972ac5cf81b80b88b2c4"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ad6ea21e1a03d972ac5cf81b80b88b2c4">tvm::RelayExprNode::virtual_device_</a></div><div class="ttdeci">ObjectRef virtual_device_</div><div class="ttdoc">The virtual device (VirtualDevice) for this node (the result of device planning). For first-order exp...</div><div class="ttdef"><b>Definition:</b> expr.h:193</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ad6ea21e1a03d972ac5cf81b80b88b2c4"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ad6ea21e1a03d972ac5cf81b80b88b2c4">tvm::RelayExprNode::virtual_device_</a></div><div class="ttdeci">ObjectRef virtual_device_</div><div class="ttdoc">The virtual device (VirtualDevice) for this node (the result of device planning). For first-order exp...</div><div class="ttdef"><b>Definition:</b> expr.h:407</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1attr_html_a6a9fb19174e7077b75478119469fed8d"><div class="ttname"><a href="namespacetvm_1_1relay_1_1attr.html#a6a9fb19174e7077b75478119469fed8d">tvm::relay::attr::kExtern</a></div><div class="ttdeci">constexpr const char * kExtern</div><div class="ttdoc">Mark the function as externally implemented, ie bound in a runtime::Module within the IRModule&amp;#39;s &quot;ext...</div><div class="ttdef"><b>Definition:</b> function.h:173</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="object_8h_html_ac6e7295a4999e2c8e4a2c990beca887a"><div class="ttname"><a href="object_8h.html#ac6e7295a4999e2c8e4a2c990beca887a">TVM_DEFINE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:713</div></div>
 <div class="ttc" id="classtvm_1_1relay_1_1FunctionNode_html_a689c5078fb501798aa1d57d1825e3b5a"><div class="ttname"><a href="classtvm_1_1relay_1_1FunctionNode.html#a689c5078fb501798aa1d57d1825e3b5a">tvm::relay::FunctionNode::params</a></div><div class="ttdeci">tvm::Array&lt; Var &gt; params</div><div class="ttdoc">Function parameters. </div><div class="ttdef"><b>Definition:</b> function.h:42</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1attr_html_a6b63f68ed3b4bcf00f1ad58b6a881a4b"><div class="ttname"><a href="namespacetvm_1_1relay_1_1attr.html#a6b63f68ed3b4bcf00f1ad58b6a881a4b">tvm::relay::attr::kReshapeOnly</a></div><div class="ttdeci">constexpr const char * kReshapeOnly</div><div class="ttdoc">Mark the function as only composed of reshape operations. </div><div class="ttdef"><b>Definition:</b> function.h:197</div></div>
diff --git a/docs/reference/api/doxygen/relay_2op__attr__types_8h_source.html b/docs/reference/api/doxygen/relay_2op__attr__types_8h_source.html
index d5654145d..4fde858f2 100644
--- a/docs/reference/api/doxygen/relay_2op__attr__types_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2op__attr__types_8h_source.html
@@ -83,7 +83,7 @@ $(function() {
 <div class="ttc" id="relay_2type_8h_html"><div class="ttname"><a href="relay_2type_8h.html">type.h</a></div><div class="ttdoc">Relay typed AST nodes. </div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_ab5f4d382bf1bee69c3e484ea6c837578ad282ac05b650598c3bfb559dc62a0738"><div class="ttname"><a href="namespacetvm_1_1relay.html#ab5f4d382bf1bee69c3e484ea6c837578ad282ac05b650598c3bfb559dc62a0738">tvm::relay::kCommReduce</a></div><div class="ttdef"><b>Definition:</b> op_attr_types.h:56</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc&lt; Array&lt; te::Tensor &gt;(const Attrs &amp;attrs, const Array&lt; te::Tensor &gt; &amp;inputs, const Type &amp;out_type)&gt;</a></div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_afb8a8d4dd43830d4ce7d566abcd1c450"><div class="ttname"><a href="namespacetvm_1_1relay.html#afb8a8d4dd43830d4ce7d566abcd1c450">tvm::relay::TOpIsStateful</a></div><div class="ttdeci">bool TOpIsStateful</div><div class="ttdoc">Whether operator is stateful or contain internal state. </div><div class="ttdef"><b>Definition:</b> op_attr_types.h:78</div></div>
 <div class="ttc" id="classtvm_1_1Target_html"><div class="ttname"><a href="classtvm_1_1Target.html">tvm::Target</a></div><div class="ttdoc">Managed reference class to TargetNode. </div><div class="ttdef"><b>Definition:</b> target.h:141</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html">tvm::runtime::ObjectRef</a></div><div class="ttdoc">Base class of all object reference. </div><div class="ttdef"><b>Definition:</b> object.h:511</div></div>
diff --git a/docs/reference/api/doxygen/relay_2transform_8h_source.html b/docs/reference/api/doxygen/relay_2transform_8h_source.html
index d4a0c19d0..1cd4176cc 100644
--- a/docs/reference/api/doxygen/relay_2transform_8h_source.html
+++ b/docs/reference/api/doxygen/relay_2transform_8h_source.html
@@ -114,7 +114,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TypedPackedFunc_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TypedPackedFunc.html">tvm::runtime::TypedPackedFunc</a></div><div class="ttdoc">Please refer to TypedPackedFunc&lt;R(Args..)&gt;. </div><div class="ttdef"><b>Definition:</b> packed_func.h:60</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1transform_html_a473c64deec00aaf5636cc12f844b1e42"><div class="ttname"><a href="namespacetvm_1_1relay_1_1transform.html#a473c64deec00aaf5636cc12f844b1e42">tvm::relay::transform::ManifestAlloc</a></div><div class="ttdeci">Pass ManifestAlloc(VirtualDevice cpu_virtual_device)</div><div class="ttdoc">A pass for manifesting explicit memory allocations and rewriting specific dialects. </div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1transform_html_a40b49e82dd73b05bfac14040ee2fbdee"><div class="ttname"><a href="namespacetvm_1_1relay_1_1transform.html#a40b49e82dd73b05bfac14040ee2fbdee">tvm::relay::transform::ConvertLayout</a></div><div class="ttdeci">Pass ConvertLayout(const Map&lt; String, Array&lt; String &gt;&gt; &amp;desired_layouts)</div><div class="ttdoc">Given a dest layout, this pass transforms the expr such that most of the ops input data layout is cha...</div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1transform_html_a2425d757b896168a109498e8d34ba960"><div class="ttname"><a href="namespacetvm_1_1relay_1_1transform.html#a2425d757b896168a109498e8d34ba960">tvm::relay::transform::SplitArgs</a></div><div class="ttdeci">Pass SplitArgs(int max_function_args)</div><div class="ttdoc">Split function with huge number of arguments to smaller pieces. </div></div>
 <div class="ttc" id="namespacetvm_1_1relay_1_1transform_html_a03b053f3d99d5c420ddc8492e6b987bf"><div class="ttname"><a href="namespacetvm_1_1relay_1_1transform.html#a03b053f3d99d5c420ddc8492e6b987bf">tvm::relay::transform::RewriteAnnotatedOps</a></div><div class="ttdeci">Pass RewriteAnnotatedOps(int fallback_device)</div><div class="ttdoc">Rewrite the annotated program. </div></div>
diff --git a/docs/reference/api/doxygen/search/all_10.js b/docs/reference/api/doxygen/search/all_10.js
index b7fbb5541..021821479 100644
--- a/docs/reference/api/doxygen/search/all_10.js
+++ b/docs/reference/api/doxygen/search/all_10.js
@@ -43,7 +43,7 @@ var searchData=
   ['operationmap',['OperationMap',['../classtvm_1_1auto__scheduler_1_1AccessAnalyzerNode.html#ad1678244c48ee2916fdbf0c3d7132384',1,'tvm::auto_scheduler::AccessAnalyzerNode']]],
   ['operationnode',['OperationNode',['../classtvm_1_1te_1_1OperationNode.html',1,'tvm::te']]],
   ['operator_20_26',['operator &amp;',['../namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a',1,'tvm::operator &amp;(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a6a02ef06e951b1d09acc96c2a4149ae3',1,'tvm::operator &amp;(const PrimExpr &amp;a, int b)'],['../namespacetvm.html#a88ce3d8cef61f4c1ded9c5379b03c352',1,'tvm::operator &amp;(int a, const PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#a69bc76d169f422bffc6e0ee84afcea87',1,'tvm::topi::operator &amp;(const tvm::te::Tensor &amp [...]
-  ['operator_20_26_26',['operator &amp;&amp;',['../namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483',1,'tvm::operator &amp;&amp;(const Bool &amp;a, bool b)'],['../namespacetvm.html#a313252634ee340fcb374f25699832b5f',1,'tvm::operator &amp;&amp;(bool a, const Bool &amp;b)'],['../namespacetvm.html#a3d58c54be9c168b77bd3c9b6c3b962d3',1,'tvm::operator &amp;&amp;(const Bool &amp;a, const Bool &amp;b)'],['../namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8',1,'tvm::operator &amp;&amp;(Prim [...]
+  ['operator_20_26_26',['operator &amp;&amp;',['../namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8',1,'tvm::operator &amp;&amp;(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483',1,'tvm::operator &amp;&amp;(const Bool &amp;a, bool b)'],['../namespacetvm.html#a313252634ee340fcb374f25699832b5f',1,'tvm::operator &amp;&amp;(bool a, const Bool &amp;b)'],['../namespacetvm.html#a3d58c54be9c168b77bd3c9b6c3b962d3',1,'tvm::operator &amp;&amp;(const Bool &amp;a, [...]
   ['operator_20array_3c_20integer_20_3e',['operator Array&lt; Integer &gt;',['../classtvm_1_1relay_1_1FeatureSet.html#afc19754f86ffa1ffc63f81cc2fce1959',1,'tvm::relay::FeatureSet']]],
   ['operator_20bool',['operator bool',['../classtvm_1_1Bool.html#a01580c15a0d6db0ae60dfaa39f2aa044',1,'tvm::Bool::operator bool()'],['../classtvm_1_1runtime_1_1Optional.html#a3b2e44b31ccfcb1e8f13fac3aefca792',1,'tvm::runtime::Optional::operator bool()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#a3985a6331cef665602fc3e14b59e7a0c',1,'tvm::runtime::ObjectPtr::operator bool()'],['../classtvm_1_1runtime_1_1TVMPODValue__.html#a10b749a19ee7b11803b94377aee0e8b5',1,'tvm::runtime::TVMPODValue_::o [...]
   ['operator_20datatype',['operator DataType',['../classtvm_1_1runtime_1_1TVMArgValue.html#a0b14124450fc73eb7f6cee8cedf1e32f',1,'tvm::runtime::TVMArgValue::operator DataType()'],['../classtvm_1_1runtime_1_1TVMMovableArgValue__.html#ab68a86a8394d26c9669118312504605c',1,'tvm::runtime::TVMMovableArgValue_::operator DataType()'],['../classtvm_1_1runtime_1_1TVMRetValue.html#a8f597329f3b49b7b89b2d327d1067130',1,'tvm::runtime::TVMRetValue::operator DataType()']]],
@@ -64,34 +64,34 @@ var searchData=
   ['operator_20typedpackedfunc_3c_20ftype_20_3e',['operator TypedPackedFunc&lt; FType &gt;',['../classtvm_1_1runtime_1_1TVMArgValue.html#af345627966162cdc4f9bd366747f0833',1,'tvm::runtime::TVMArgValue::operator TypedPackedFunc&lt; FType &gt;()'],['../classtvm_1_1runtime_1_1TVMMovableArgValue__.html#a6d798256cda612b1d5ac89b4343fbf34',1,'tvm::runtime::TVMMovableArgValue_::operator TypedPackedFunc&lt; FType &gt;()'],['../classtvm_1_1runtime_1_1TVMRetValue.html#ad8b189915c5f3cb57fe174d87072d [...]
   ['operator_20uint64_5ft',['operator uint64_t',['../classtvm_1_1runtime_1_1TVMPODValue__.html#a6eb8d1f7dcce5b2a76322956fdd3fb8c',1,'tvm::runtime::TVMPODValue_']]],
   ['operator_20void_20_2a',['operator void *',['../classtvm_1_1runtime_1_1TVMPODValue__.html#ada67522073183c9a1a9d4afb847afc4c',1,'tvm::runtime::TVMPODValue_']]],
-  ['operator_21',['operator!',['../classtvm_1_1Bool.html#a00a5153c31270c6ec308a516cd46f7fb',1,'tvm::Bool::operator!()'],['../namespacetvm_1_1te.html#a27eae569b04a2d43a513b654b6d2b98c',1,'tvm::te::operator!()'],['../namespacetvm.html#ab354bf1270121abea71fade83f13b0b0',1,'tvm::operator!()']]],
-  ['operator_21_3d',['operator!=',['../classtvm_1_1Integer.html#a49869e0f187d66dcd00568d3e953fc04',1,'tvm::Integer::operator!=(int other) const'],['../classtvm_1_1Integer.html#a1e099442e256b242fe8e2e2a49c1dc33',1,'tvm::Integer::operator!=(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a6e54667aa71ae41d33c22d6bd3973404',1,'tvm::runtime::IterAdapter::operator!=()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a2a049030ff85513916a05924bf519d2c',1,'tvm::runtime::Reve [...]
+  ['operator_21',['operator!',['../classtvm_1_1Bool.html#a00a5153c31270c6ec308a516cd46f7fb',1,'tvm::Bool::operator!()'],['../namespacetvm.html#ab354bf1270121abea71fade83f13b0b0',1,'tvm::operator!()'],['../namespacetvm_1_1te.html#a27eae569b04a2d43a513b654b6d2b98c',1,'tvm::te::operator!()']]],
+  ['operator_21_3d',['operator!=',['../classtvm_1_1Integer.html#a49869e0f187d66dcd00568d3e953fc04',1,'tvm::Integer::operator!=(int other) const'],['../classtvm_1_1Integer.html#a1e099442e256b242fe8e2e2a49c1dc33',1,'tvm::Integer::operator!=(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a6e54667aa71ae41d33c22d6bd3973404',1,'tvm::runtime::IterAdapter::operator!=()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a2a049030ff85513916a05924bf519d2c',1,'tvm::runtime::Reve [...]
   ['operator_25',['operator%',['../namespacetvm.html#ab25738e50b37cd07b2d171ca74ba9321',1,'tvm']]],
   ['operator_26_26',['operator&amp;&amp;',['../namespacetvm_1_1te.html#a45c2369417cfb6d8f20c400bcb1a693c',1,'tvm::te::operator&amp;&amp;(const Tensor::Slice &amp;a, const T &amp;b)'],['../namespacetvm_1_1te.html#a72b7fda8c6d5b169296d3d12522cb40f',1,'tvm::te::operator&amp;&amp;(const T &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm_1_1te.html#a8f10bf673f27ec755a0f284f3115c2ea',1,'tvm::te::operator&amp;&amp;(const Tensor::Slice &amp;a, const Tensor::Slice &amp;b)']]],
   ['operator_28_29',['operator()',['../classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html#add50f8cc72f7fbf50e456d31f045d24c',1,'tvm::arith::ConstIntBoundAnalyzer::operator()(const PrimExpr &amp;expr) const'],['../classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html#aa24f8d56b331e1b9c6ed315877c4e0a2',1,'tvm::arith::ConstIntBoundAnalyzer::operator()(const PrimExpr &amp;expr, BoundMapType *bound)'],['../classtvm_1_1arith_1_1ModularSetAnalyzer.html#a57c3c62ebc821f5f01bc2b43f06488da',1,'tvm::arith::M [...]
-  ['operator_2a',['operator*',['../classtvm_1_1relay_1_1DFPattern.html#a000e12a821910cb9bfb5c5df0b9f21ae',1,'tvm::relay::DFPattern::operator*()'],['../classtvm_1_1runtime_1_1IterAdapter.html#a4869a61f3e598fcbf8cdb3c14d66f859',1,'tvm::runtime::IterAdapter::operator*()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a0b4fc047041f8e1cb3454ee89120444a',1,'tvm::runtime::ReverseIterAdapter::operator*()'],['../classtvm_1_1runtime_1_1MapNode_1_1iterator.html#ac09b2cd5327e5102ab373b482530f1 [...]
+  ['operator_2a',['operator*',['../classtvm_1_1relay_1_1DFPattern.html#a000e12a821910cb9bfb5c5df0b9f21ae',1,'tvm::relay::DFPattern::operator*()'],['../classtvm_1_1runtime_1_1IterAdapter.html#a4869a61f3e598fcbf8cdb3c14d66f859',1,'tvm::runtime::IterAdapter::operator*()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a0b4fc047041f8e1cb3454ee89120444a',1,'tvm::runtime::ReverseIterAdapter::operator*()'],['../classtvm_1_1runtime_1_1MapNode_1_1iterator.html#ac09b2cd5327e5102ab373b482530f1 [...]
   ['operator_2a_3d',['operator*=',['../namespacetvm.html#a9233d7b436ae52ac070442ba19caed18',1,'tvm']]],
-  ['operator_2b',['operator+',['../classtvm_1_1arith_1_1IntGroupBounds.html#a5de071b5c736097d25fa1a508b06f565',1,'tvm::arith::IntGroupBounds::operator+()'],['../classtvm_1_1arith_1_1IntConstraintsTransform.html#a0a1965c2707e06b47776e574a9597db9',1,'tvm::arith::IntConstraintsTransform::operator+()'],['../classtvm_1_1relay_1_1DFPattern.html#a9074ec635ca5c4ffe0fa7b6f7d7fe394',1,'tvm::relay::DFPattern::operator+()'],['../classtvm_1_1relay_1_1FeatureSet.html#ac2448ecf37a176ad612a9765927ed4fa' [...]
+  ['operator_2b',['operator+',['../classtvm_1_1arith_1_1IntGroupBounds.html#a5de071b5c736097d25fa1a508b06f565',1,'tvm::arith::IntGroupBounds::operator+()'],['../classtvm_1_1arith_1_1IntConstraintsTransform.html#a0a1965c2707e06b47776e574a9597db9',1,'tvm::arith::IntConstraintsTransform::operator+()'],['../classtvm_1_1relay_1_1DFPattern.html#a9074ec635ca5c4ffe0fa7b6f7d7fe394',1,'tvm::relay::DFPattern::operator+()'],['../classtvm_1_1relay_1_1FeatureSet.html#ac2448ecf37a176ad612a9765927ed4fa' [...]
   ['operator_2b_2b',['operator++',['../classtvm_1_1runtime_1_1IterAdapter.html#a40f67dd792619d7d41ac45fc0f52a0c4',1,'tvm::runtime::IterAdapter::operator++()'],['../classtvm_1_1runtime_1_1IterAdapter.html#a76971f210bcd72c05992ae57b2ac8b2e',1,'tvm::runtime::IterAdapter::operator++(int)'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a362bd28f025d28551159e46c415309bd',1,'tvm::runtime::ReverseIterAdapter::operator++()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#acb427db7d19a7 [...]
   ['operator_2b_3d',['operator+=',['../classtvm_1_1relay_1_1FeatureSet.html#a73e4e0de2e58ef67a88d27a48b3c3fc0',1,'tvm::relay::FeatureSet::operator+=()'],['../namespacetvm.html#a96061840d12f84eeecc8fae11e245242',1,'tvm::operator+=()']]],
-  ['operator_2d',['operator-',['../classtvm_1_1relay_1_1DFPattern.html#ad638d57cae9db9a1d4cd8831ab60a18b',1,'tvm::relay::DFPattern::operator-()'],['../classtvm_1_1relay_1_1FeatureSet.html#ad0412921b6fe93a2f51b3c72c03c4095',1,'tvm::relay::FeatureSet::operator-()'],['../classtvm_1_1runtime_1_1IterAdapter.html#ab5671b2b9d56c2abcfb8d3f03cf6fd9e',1,'tvm::runtime::IterAdapter::operator-(difference_type offset) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a5bbe56d308014b49fec6374315f8d4 [...]
+  ['operator_2d',['operator-',['../classtvm_1_1relay_1_1DFPattern.html#ad638d57cae9db9a1d4cd8831ab60a18b',1,'tvm::relay::DFPattern::operator-()'],['../classtvm_1_1relay_1_1FeatureSet.html#ad0412921b6fe93a2f51b3c72c03c4095',1,'tvm::relay::FeatureSet::operator-()'],['../classtvm_1_1runtime_1_1IterAdapter.html#ab5671b2b9d56c2abcfb8d3f03cf6fd9e',1,'tvm::runtime::IterAdapter::operator-(difference_type offset) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a5bbe56d308014b49fec6374315f8d4 [...]
   ['operator_2d_2d',['operator--',['../classtvm_1_1runtime_1_1IterAdapter.html#acb643cc554062422a4fd00bbfb3b0a6d',1,'tvm::runtime::IterAdapter::operator--()'],['../classtvm_1_1runtime_1_1IterAdapter.html#ad6a362ac9b5e17443f252f16f1c82703',1,'tvm::runtime::IterAdapter::operator--(int)'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#afa7ed1b09064bb3d6dea7ad886145346',1,'tvm::runtime::ReverseIterAdapter::operator--()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a53f1d3671d30c [...]
   ['operator_2d_3d',['operator-=',['../classtvm_1_1relay_1_1FeatureSet.html#ad90ce4d929774c0feb92c3eb56bea338',1,'tvm::relay::FeatureSet::operator-=()'],['../namespacetvm.html#a164d454c519fecaa5611c86b65469acc',1,'tvm::operator-=()']]],
   ['operator_2d_3e',['operator-&gt;',['../classtvm_1_1arith_1_1IterMapResult.html#a248e87671f3870f758244125fdbb5ffb',1,'tvm::arith::IterMapResult::operator-&gt;()'],['../classtvm_1_1auto__scheduler_1_1RandomModel.html#a337aa632ca8aedc20266ce0822b32bf9',1,'tvm::auto_scheduler::RandomModel::operator-&gt;()'],['../classtvm_1_1DiagnosticRenderer.html#a21576c617844ae035b09e6e0a6fdd461',1,'tvm::DiagnosticRenderer::operator-&gt;()'],['../classtvm_1_1DiagnosticContext.html#a8f14af7e269aa0647cd9c [...]
   ['operator_2f',['operator/',['../classtvm_1_1relay_1_1DFPattern.html#ad0839b39558941ac374c213f78ca2c81',1,'tvm::relay::DFPattern::operator/()'],['../namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7',1,'tvm::operator/(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a136427374941fbf8e50f53b1cab39e38',1,'tvm::operator/(const PrimExpr &amp;a, const TB &amp;b)']]],
   ['operator_2f_3d',['operator/=',['../namespacetvm.html#a51dc569142bf8ce8ea55f73029d3807d',1,'tvm']]],
-  ['operator_3c',['operator&lt;',['../classtvm_1_1runtime_1_1ObjectRef.html#a4744bf4a1b48f202d41b51dc5e08e6ee',1,'tvm::runtime::ObjectRef::operator&lt;()'],['../namespacetvm_1_1runtime.html#a2865dffa2fddf5eff9d7ed397563ebd6',1,'tvm::runtime::operator&lt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#ad5305faaeefd679da62186dab423bdab',1,'tvm::runtime::operator&lt;(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.htm [...]
-  ['operator_3c_3c',['operator&lt;&lt;',['../classtvm_1_1DiagnosticBuilder.html#aa92a3f9039d464fbefaed90b0e255e84',1,'tvm::DiagnosticBuilder::operator&lt;&lt;()'],['../structtvm_1_1ErrorBuilder.html#ad40b754d2d8992b65d0bc5b116bd3f71',1,'tvm::ErrorBuilder::operator&lt;&lt;()'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a7948440c8e6f670e3c782619415dc184',1,'tvm::runtime::vm::Instruction::operator&lt;&lt;()'],['../structtvm_1_1runtime_1_1vm_1_1VMFunction.html#a4dd5eae76553d1be115e7 [...]
-  ['operator_3c_3d',['operator&lt;=',['../namespacetvm_1_1runtime.html#a92428efae022d4982b2644f8960d4386',1,'tvm::runtime::operator&lt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8daf39dc422f228fae2ec11a426bab28',1,'tvm::runtime::operator&lt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#a9cf2e7e67fd12d69c5bce2be881c8296',1,'tvm::runtime::operator&lt;=(const String &amp;lhs, const String &amp;rhs)'],[ [...]
+  ['operator_3c',['operator&lt;',['../classtvm_1_1runtime_1_1ObjectRef.html#a4744bf4a1b48f202d41b51dc5e08e6ee',1,'tvm::runtime::ObjectRef::operator&lt;()'],['../namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b',1,'tvm::operator&lt;()'],['../namespacetvm_1_1runtime.html#a2865dffa2fddf5eff9d7ed397563ebd6',1,'tvm::runtime::operator&lt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#ad5305faaeefd679da62186dab423bdab',1,'tvm::runtime::operator&lt;(co [...]
+  ['operator_3c_3c',['operator&lt;&lt;',['../classtvm_1_1DiagnosticBuilder.html#aa92a3f9039d464fbefaed90b0e255e84',1,'tvm::DiagnosticBuilder::operator&lt;&lt;()'],['../structtvm_1_1ErrorBuilder.html#ad40b754d2d8992b65d0bc5b116bd3f71',1,'tvm::ErrorBuilder::operator&lt;&lt;()'],['../structtvm_1_1runtime_1_1vm_1_1Instruction.html#a7948440c8e6f670e3c782619415dc184',1,'tvm::runtime::vm::Instruction::operator&lt;&lt;()'],['../structtvm_1_1runtime_1_1vm_1_1VMFunction.html#a4dd5eae76553d1be115e7 [...]
+  ['operator_3c_3d',['operator&lt;=',['../namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d',1,'tvm::operator&lt;=()'],['../namespacetvm_1_1runtime.html#a92428efae022d4982b2644f8960d4386',1,'tvm::runtime::operator&lt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8daf39dc422f228fae2ec11a426bab28',1,'tvm::runtime::operator&lt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#a9cf2e7e67fd12d69c5bce2be881c82 [...]
   ['operator_3d',['operator=',['../classtvm_1_1arith_1_1Analyzer.html#a9dccc7d98b8b9465390e10436d3a9178',1,'tvm::arith::Analyzer::operator=()'],['../classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#aab332907b9f98876f441f6403b801187',1,'tvm::TypedEnvFunc&lt; R(Args...)&gt;::operator=()'],['../classtvm_1_1Integer.html#ad538a2ae6f636b3ce38fb4162b1c2549',1,'tvm::Integer::operator=()'],['../classtvm_1_1relay_1_1PatternWildcard.html#aef430b36a4be504969829f800e126245',1,'tvm::relay::Patter [...]
-  ['operator_3d_3d',['operator==',['../classtvm_1_1Integer.html#a5b9ad6d47c7c6df5a066d58f6ba65f8e',1,'tvm::Integer::operator==(int other) const'],['../classtvm_1_1Integer.html#ad2291d037ff36f5371f6381478b3eeaf',1,'tvm::Integer::operator==(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#ac4066242008f1e60533099615a20fdbd',1,'tvm::runtime::IterAdapter::operator==()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#ae71116873ea643de9f8ec1836cc116db',1,'tvm::runtime::Reve [...]
-  ['operator_3e',['operator&gt;',['../namespacetvm_1_1runtime.html#af7996be0c57184e8ff9e655d0d055edc',1,'tvm::runtime::operator&gt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a031e6c8e64cd9db11754355e3250ab4c',1,'tvm::runtime::operator&gt;(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#abf23f2e6b19c87ecf3f27a7506ddf4c0',1,'tvm::runtime::operator&gt;(const String &amp;lhs, const String &amp;rhs)'],['../nam [...]
-  ['operator_3e_3d',['operator&gt;=',['../namespacetvm_1_1runtime.html#a944029d40e689e4d2acab53ce36a5c99',1,'tvm::runtime::operator&gt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8f8694e651078c6df7c5c26898ee9f14',1,'tvm::runtime::operator&gt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#af7310fb8b9944f41f8f30021d92847eb',1,'tvm::runtime::operator&gt;=(const String &amp;lhs, const String &amp;rhs)'],[ [...]
-  ['operator_3e_3e',['operator&gt;&gt;',['../namespacetvm_1_1te.html#a4a8524467a57ae005654a3f0cb816e3f',1,'tvm::te::operator&gt;&gt;(const Tensor::Slice &amp;a, const T &amp;b)'],['../namespacetvm_1_1te.html#a8705a88b943011532ff4c94c4b06c213',1,'tvm::te::operator&gt;&gt;(const T &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm_1_1te.html#a9d3d9a057d5f1a36277ac4005f38bafa',1,'tvm::te::operator&gt;&gt;(const Tensor::Slice &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm.html# [...]
+  ['operator_3d_3d',['operator==',['../classtvm_1_1Integer.html#a5b9ad6d47c7c6df5a066d58f6ba65f8e',1,'tvm::Integer::operator==(int other) const'],['../classtvm_1_1Integer.html#ad2291d037ff36f5371f6381478b3eeaf',1,'tvm::Integer::operator==(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#ac4066242008f1e60533099615a20fdbd',1,'tvm::runtime::IterAdapter::operator==()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#ae71116873ea643de9f8ec1836cc116db',1,'tvm::runtime::Reve [...]
+  ['operator_3e',['operator&gt;',['../namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041',1,'tvm::operator&gt;()'],['../namespacetvm_1_1runtime.html#af7996be0c57184e8ff9e655d0d055edc',1,'tvm::runtime::operator&gt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a031e6c8e64cd9db11754355e3250ab4c',1,'tvm::runtime::operator&gt;(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#abf23f2e6b19c87ecf3f27a7506ddf4c0',1,' [...]
+  ['operator_3e_3d',['operator&gt;=',['../namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf',1,'tvm::operator&gt;=()'],['../namespacetvm_1_1runtime.html#a944029d40e689e4d2acab53ce36a5c99',1,'tvm::runtime::operator&gt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8f8694e651078c6df7c5c26898ee9f14',1,'tvm::runtime::operator&gt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#af7310fb8b9944f41f8f30021d92847 [...]
+  ['operator_3e_3e',['operator&gt;&gt;',['../namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41',1,'tvm::operator&gt;&gt;()'],['../namespacetvm_1_1te.html#a4a8524467a57ae005654a3f0cb816e3f',1,'tvm::te::operator&gt;&gt;(const Tensor::Slice &amp;a, const T &amp;b)'],['../namespacetvm_1_1te.html#a8705a88b943011532ff4c94c4b06c213',1,'tvm::te::operator&gt;&gt;(const T &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm_1_1te.html#a9d3d9a057d5f1a36277ac4005f38bafa',1,'tvm::te::operator&gt [...]
   ['operator_5b_5d',['operator[]',['../classtvm_1_1AttrRegistryMapContainerMap.html#a713c3d1884423e3e67e3fdaef2566925',1,'tvm::AttrRegistryMapContainerMap::operator[]()'],['../classtvm_1_1AttrRegistryMap.html#ae4152d6da01b645eeff90fbaeac7c14f',1,'tvm::AttrRegistryMap::operator[]()'],['../classtvm_1_1runtime_1_1ADT.html#a88cf389012aad0e129c84c056ae546fc',1,'tvm::runtime::ADT::operator[]()'],['../classtvm_1_1runtime_1_1Array.html#ac84683f61368eb10e26e72a0097cccd7',1,'tvm::runtime::Array::o [...]
   ['operator_5e',['operator^',['../namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f',1,'tvm::operator^(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a82dc2fe21e7a64be5a1b11c2a8775d31',1,'tvm::operator^(const PrimExpr &amp;a, int b)'],['../namespacetvm.html#a6f638564e5e4d1023096523800f2579e',1,'tvm::operator^(int a, const PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#abef6b4a192138a38c651c2c347e60263',1,'tvm::topi::operator^(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &a [...]
   ['operator_7c',['operator|',['../namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d',1,'tvm::operator|(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a7c7fc3c45e6f6b52b2a1064deabd0797',1,'tvm::operator|(const PrimExpr &amp;a, int b)'],['../namespacetvm.html#ad5ba71021b167b0a6ca2138b2c8bbace',1,'tvm::operator|(int a, const PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#a0e3d0c113031f4b209febd097e426e06',1,'tvm::topi::operator|(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &a [...]
-  ['operator_7c_7c',['operator||',['../classtvm_1_1relay_1_1DFPattern.html#ab8caf2bf80291b17922190cab174e11d',1,'tvm::relay::DFPattern::operator||()'],['../namespacetvm.html#a002710a4652156a57495e10a09b5d002',1,'tvm::operator||(const Bool &amp;a, bool b)'],['../namespacetvm.html#a4c8c1c1c248859ce0d20f614e18a9524',1,'tvm::operator||(bool a, const Bool &amp;b)'],['../namespacetvm.html#ae4ef6ceffc5778d734c2ddfc72020d60',1,'tvm::operator||(const Bool &amp;a, const Bool &amp;b)'],['../namespa [...]
+  ['operator_7c_7c',['operator||',['../classtvm_1_1relay_1_1DFPattern.html#ab8caf2bf80291b17922190cab174e11d',1,'tvm::relay::DFPattern::operator||()'],['../namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6',1,'tvm::operator||(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a002710a4652156a57495e10a09b5d002',1,'tvm::operator||(const Bool &amp;a, bool b)'],['../namespacetvm.html#a4c8c1c1c248859ce0d20f614e18a9524',1,'tvm::operator||(bool a, const Bool &amp;b)'],['../namespacetvm.html#ae4 [...]
   ['operator_7e',['operator~',['../namespacetvm.html#a354b9954ff25dd819a51d856fdd38827',1,'tvm']]],
   ['opimplementation',['OpImplementation',['../classtvm_1_1relay_1_1OpImplementation.html',1,'tvm::relay']]],
   ['opimplementationnode',['OpImplementationNode',['../classtvm_1_1relay_1_1OpImplementationNode.html',1,'tvm::relay']]],
diff --git a/docs/reference/api/doxygen/search/all_11.js b/docs/reference/api/doxygen/search/all_11.js
index ea6635948..8c5da8ad1 100644
--- a/docs/reference/api/doxygen/search/all_11.js
+++ b/docs/reference/api/doxygen/search/all_11.js
@@ -21,7 +21,7 @@ var searchData=
   ['packetfieldsizebytes',['PacketFieldSizeBytes',['../classtvm_1_1runtime_1_1micro__rpc_1_1PacketFieldSizeBytes.html',1,'tvm::runtime::micro_rpc']]],
   ['packimportstoc',['PackImportsToC',['../namespacetvm_1_1codegen.html#abf02059ebadcdb8bbbe5c840b646d67b',1,'tvm::codegen']]],
   ['packimportstollvm',['PackImportsToLLVM',['../namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6',1,'tvm::codegen']]],
-  ['pad',['Pad',['../namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121',1,'tvm::topi::Pad(const Array&lt; PrimExpr &gt; shape, int odim)'],['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElement [...]
+  ['pad',['pad',['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElementWise, std::string pad_mode=&quot;constant&quot;, const Array&lt; PrimExpr &gt; *dyn_output_shape=nullptr)'],['../namespacetvm_1_1topi [...]
   ['pad_5fmode',['pad_mode',['../structtvm_1_1relay_1_1PadAttrs.html#a5b524c3add781cd2da894e81553079f8',1,'tvm::relay::PadAttrs']]],
   ['pad_5futils_2eh',['pad_utils.h',['../pad__utils_8h.html',1,'']]],
   ['pad_5fvalue',['pad_value',['../structtvm_1_1relay_1_1SpaceToBatchNDAttrs.html#a7c0fbd47621c925a45e1074f85a6b70f',1,'tvm::relay::SpaceToBatchNDAttrs']]],
diff --git a/docs/reference/api/doxygen/search/all_14.js b/docs/reference/api/doxygen/search/all_14.js
index c96fdc121..6b627b72a 100644
--- a/docs/reference/api/doxygen/search/all_14.js
+++ b/docs/reference/api/doxygen/search/all_14.js
@@ -165,7 +165,7 @@ var searchData=
   ['setvalue_3c_20uint64_5ft_20_3e',['SetValue&lt; uint64_t &gt;',['../namespacetvm_1_1detail.html#acb3382242cbf538f64edae13e4ec5a84',1,'tvm::detail']]],
   ['shallowcopy',['ShallowCopy',['../classtvm_1_1IRModuleNode.html#a86bbdc4b857ce5958a2b5f29e1d6fcb6',1,'tvm::IRModuleNode']]],
   ['shallowcopyirmodule',['ShallowCopyIRModule',['../classtvm_1_1IRModule.html#aea8b821cf92cf525bd87bf15f5d31889',1,'tvm::IRModule']]],
-  ['shape',['shape',['../classtvm_1_1TensorTypeNode.html#a98fa347833e4504dd6f8056d9863a708',1,'tvm::TensorTypeNode::shape()'],['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html#ac16d3b10f7c68eefb27e55e865bb304c',1,'tvm::meta_schedule::TensorInfoNode::shape()'],['../structtvm_1_1relay_1_1InitOpAttrs.html#aaaec76cc5ea9a543c4ea174a6b38bf5e',1,'tvm::relay::InitOpAttrs::shape()'],['../classtvm_1_1relay_1_1ShapePatternNode.html#a749813cbbd38f8021a7df897d527d6e0',1,'tvm::relay::ShapePattern [...]
+  ['shape',['shape',['../classtvm_1_1TensorTypeNode.html#a98fa347833e4504dd6f8056d9863a708',1,'tvm::TensorTypeNode::shape()'],['../classtvm_1_1meta__schedule_1_1TensorInfoNode.html#ac16d3b10f7c68eefb27e55e865bb304c',1,'tvm::meta_schedule::TensorInfoNode::shape()'],['../structtvm_1_1relay_1_1InitOpAttrs.html#aaaec76cc5ea9a543c4ea174a6b38bf5e',1,'tvm::relay::InitOpAttrs::shape()'],['../classtvm_1_1relay_1_1ShapePatternNode.html#a749813cbbd38f8021a7df897d527d6e0',1,'tvm::relay::ShapePattern [...]
   ['shape_5f',['shape_',['../classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html#aa5597a1760c9f8c9d1fd51584b1283fb',1,'tvm::runtime::NDArray::ContainerBase']]],
   ['shape_5fbackward_5frule',['shape_backward_rule',['../classtvm_1_1tir_1_1BijectiveLayoutNode.html#a0befdd0a2371c0d12970e8ac6623b59b',1,'tvm::tir::BijectiveLayoutNode']]],
   ['shape_5fcount',['shape_count',['../structTVMGraphExecutorGraphAttr.html#a182b228582f1186f2a15de50a25b3375',1,'TVMGraphExecutorGraphAttr']]],
diff --git a/docs/reference/api/doxygen/search/functions_10.js b/docs/reference/api/doxygen/search/functions_10.js
index 5aef6a40d..5325cf526 100644
--- a/docs/reference/api/doxygen/search/functions_10.js
+++ b/docs/reference/api/doxygen/search/functions_10.js
@@ -7,7 +7,7 @@ var searchData=
   ['packetdone',['PacketDone',['../classtvm_1_1runtime_1_1micro__rpc_1_1WriteStream.html#a1745b7d9d5a0e094e129eb7a4c363ac9',1,'tvm::runtime::micro_rpc::WriteStream']]],
   ['packimportstoc',['PackImportsToC',['../namespacetvm_1_1codegen.html#abf02059ebadcdb8bbbe5c840b646d67b',1,'tvm::codegen']]],
   ['packimportstollvm',['PackImportsToLLVM',['../namespacetvm_1_1codegen.html#ab2cd2a65bac4b26427a8ca0abe4e0bd6',1,'tvm::codegen']]],
-  ['pad',['Pad',['../namespacetvm_1_1topi.html#a97c798d0a0ec20a95d351618b83d5121',1,'tvm::topi::Pad(const Array&lt; PrimExpr &gt; shape, int odim)'],['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElement [...]
+  ['pad',['pad',['../namespacetvm_1_1topi.html#a3305d377f96cd20c23032eeada2756d5',1,'tvm::topi::pad(const tvm::te::Tensor &amp;t, const tvm::Array&lt; tvm::PrimExpr &gt; &amp;pad_before, tvm::Array&lt; tvm::PrimExpr &gt; pad_after=tvm::Array&lt; tvm::PrimExpr &gt;(), PrimExpr pad_value=PrimExpr(), std::string name=&quot;T_pad&quot;, std::string tag=kElementWise, std::string pad_mode=&quot;constant&quot;, const Array&lt; PrimExpr &gt; *dyn_output_shape=nullptr)'],['../namespacetvm_1_1topi [...]
   ['pagememorymanagercreate',['PageMemoryManagerCreate',['../page__allocator_8h.html#a720dbc7474ac13b93fafb974cfc20bc7',1,'page_allocator.h']]],
   ['parallel',['Parallel',['../classtvm_1_1tir_1_1ScheduleNode.html#a553dc17c0b49b175cd16881c81b6c789',1,'tvm::tir::ScheduleNode::Parallel()'],['../classtvm_1_1auto__scheduler_1_1State.html#a2376f0180bc5b5dd4b456f2a75d4a366',1,'tvm::auto_scheduler::State::parallel()'],['../classtvm_1_1te_1_1Stage.html#a60a6be10a1a96cb594c1399efabafef3',1,'tvm::te::Stage::parallel()']]],
   ['parallel_5ffor',['parallel_for',['../namespacetvm_1_1support.html#a8bf1225e8bb1db575578ca2d645fb23c',1,'tvm::support']]],
diff --git a/docs/reference/api/doxygen/search/functions_f.js b/docs/reference/api/doxygen/search/functions_f.js
index 97878f928..799142c7a 100644
--- a/docs/reference/api/doxygen/search/functions_f.js
+++ b/docs/reference/api/doxygen/search/functions_f.js
@@ -9,7 +9,7 @@ var searchData=
   ['op',['op',['../classtvm_1_1OpRegEntry.html#acaeedc636f8a0a85edd1e217a51b83d9',1,'tvm::OpRegEntry::op()'],['../classtvm_1_1Op.html#afde3bc925d4d4c7ea09d4da50fc32c66',1,'tvm::Op::Op()'],['../classtvm_1_1Op.html#abaafec14f5f05cc8bd3cdbf99eeb53d5',1,'tvm::Op::Op(ObjectPtr&lt; Object &gt; n)']]],
   ['operation',['Operation',['../classtvm_1_1te_1_1Operation.html#a7bc69f793cb5cbc99bf20fed8617d487',1,'tvm::te::Operation::Operation()'],['../classtvm_1_1te_1_1Operation.html#a261c64004b4c8712e97f90cb04e135d1',1,'tvm::te::Operation::Operation(ObjectPtr&lt; Object &gt; n)']]],
   ['operator_20_26',['operator &amp;',['../namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a',1,'tvm::operator &amp;(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a6a02ef06e951b1d09acc96c2a4149ae3',1,'tvm::operator &amp;(const PrimExpr &amp;a, int b)'],['../namespacetvm.html#a88ce3d8cef61f4c1ded9c5379b03c352',1,'tvm::operator &amp;(int a, const PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#a69bc76d169f422bffc6e0ee84afcea87',1,'tvm::topi::operator &amp;(const tvm::te::Tensor &amp [...]
-  ['operator_20_26_26',['operator &amp;&amp;',['../namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483',1,'tvm::operator &amp;&amp;(const Bool &amp;a, bool b)'],['../namespacetvm.html#a313252634ee340fcb374f25699832b5f',1,'tvm::operator &amp;&amp;(bool a, const Bool &amp;b)'],['../namespacetvm.html#a3d58c54be9c168b77bd3c9b6c3b962d3',1,'tvm::operator &amp;&amp;(const Bool &amp;a, const Bool &amp;b)'],['../namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8',1,'tvm::operator &amp;&amp;(Prim [...]
+  ['operator_20_26_26',['operator &amp;&amp;',['../namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8',1,'tvm::operator &amp;&amp;(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483',1,'tvm::operator &amp;&amp;(const Bool &amp;a, bool b)'],['../namespacetvm.html#a313252634ee340fcb374f25699832b5f',1,'tvm::operator &amp;&amp;(bool a, const Bool &amp;b)'],['../namespacetvm.html#a3d58c54be9c168b77bd3c9b6c3b962d3',1,'tvm::operator &amp;&amp;(const Bool &amp;a, [...]
   ['operator_20array_3c_20integer_20_3e',['operator Array&lt; Integer &gt;',['../classtvm_1_1relay_1_1FeatureSet.html#afc19754f86ffa1ffc63f81cc2fce1959',1,'tvm::relay::FeatureSet']]],
   ['operator_20bool',['operator bool',['../classtvm_1_1Bool.html#a01580c15a0d6db0ae60dfaa39f2aa044',1,'tvm::Bool::operator bool()'],['../classtvm_1_1runtime_1_1Optional.html#a3b2e44b31ccfcb1e8f13fac3aefca792',1,'tvm::runtime::Optional::operator bool()'],['../classtvm_1_1runtime_1_1ObjectPtr.html#a3985a6331cef665602fc3e14b59e7a0c',1,'tvm::runtime::ObjectPtr::operator bool()'],['../classtvm_1_1runtime_1_1TVMPODValue__.html#a10b749a19ee7b11803b94377aee0e8b5',1,'tvm::runtime::TVMPODValue_::o [...]
   ['operator_20datatype',['operator DataType',['../classtvm_1_1runtime_1_1TVMArgValue.html#a0b14124450fc73eb7f6cee8cedf1e32f',1,'tvm::runtime::TVMArgValue::operator DataType()'],['../classtvm_1_1runtime_1_1TVMMovableArgValue__.html#ab68a86a8394d26c9669118312504605c',1,'tvm::runtime::TVMMovableArgValue_::operator DataType()'],['../classtvm_1_1runtime_1_1TVMRetValue.html#a8f597329f3b49b7b89b2d327d1067130',1,'tvm::runtime::TVMRetValue::operator DataType()']]],
@@ -30,34 +30,34 @@ var searchData=
   ['operator_20typedpackedfunc_3c_20ftype_20_3e',['operator TypedPackedFunc&lt; FType &gt;',['../classtvm_1_1runtime_1_1TVMArgValue.html#af345627966162cdc4f9bd366747f0833',1,'tvm::runtime::TVMArgValue::operator TypedPackedFunc&lt; FType &gt;()'],['../classtvm_1_1runtime_1_1TVMMovableArgValue__.html#a6d798256cda612b1d5ac89b4343fbf34',1,'tvm::runtime::TVMMovableArgValue_::operator TypedPackedFunc&lt; FType &gt;()'],['../classtvm_1_1runtime_1_1TVMRetValue.html#ad8b189915c5f3cb57fe174d87072d [...]
   ['operator_20uint64_5ft',['operator uint64_t',['../classtvm_1_1runtime_1_1TVMPODValue__.html#a6eb8d1f7dcce5b2a76322956fdd3fb8c',1,'tvm::runtime::TVMPODValue_']]],
   ['operator_20void_20_2a',['operator void *',['../classtvm_1_1runtime_1_1TVMPODValue__.html#ada67522073183c9a1a9d4afb847afc4c',1,'tvm::runtime::TVMPODValue_']]],
-  ['operator_21',['operator!',['../classtvm_1_1Bool.html#a00a5153c31270c6ec308a516cd46f7fb',1,'tvm::Bool::operator!()'],['../namespacetvm_1_1te.html#a27eae569b04a2d43a513b654b6d2b98c',1,'tvm::te::operator!()'],['../namespacetvm.html#ab354bf1270121abea71fade83f13b0b0',1,'tvm::operator!()']]],
-  ['operator_21_3d',['operator!=',['../classtvm_1_1Integer.html#a49869e0f187d66dcd00568d3e953fc04',1,'tvm::Integer::operator!=(int other) const'],['../classtvm_1_1Integer.html#a1e099442e256b242fe8e2e2a49c1dc33',1,'tvm::Integer::operator!=(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a6e54667aa71ae41d33c22d6bd3973404',1,'tvm::runtime::IterAdapter::operator!=()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a2a049030ff85513916a05924bf519d2c',1,'tvm::runtime::Reve [...]
+  ['operator_21',['operator!',['../classtvm_1_1Bool.html#a00a5153c31270c6ec308a516cd46f7fb',1,'tvm::Bool::operator!()'],['../namespacetvm.html#ab354bf1270121abea71fade83f13b0b0',1,'tvm::operator!()'],['../namespacetvm_1_1te.html#a27eae569b04a2d43a513b654b6d2b98c',1,'tvm::te::operator!()']]],
+  ['operator_21_3d',['operator!=',['../classtvm_1_1Integer.html#a49869e0f187d66dcd00568d3e953fc04',1,'tvm::Integer::operator!=(int other) const'],['../classtvm_1_1Integer.html#a1e099442e256b242fe8e2e2a49c1dc33',1,'tvm::Integer::operator!=(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a6e54667aa71ae41d33c22d6bd3973404',1,'tvm::runtime::IterAdapter::operator!=()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a2a049030ff85513916a05924bf519d2c',1,'tvm::runtime::Reve [...]
   ['operator_25',['operator%',['../namespacetvm.html#ab25738e50b37cd07b2d171ca74ba9321',1,'tvm']]],
   ['operator_26_26',['operator&amp;&amp;',['../namespacetvm_1_1te.html#a45c2369417cfb6d8f20c400bcb1a693c',1,'tvm::te::operator&amp;&amp;(const Tensor::Slice &amp;a, const T &amp;b)'],['../namespacetvm_1_1te.html#a72b7fda8c6d5b169296d3d12522cb40f',1,'tvm::te::operator&amp;&amp;(const T &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm_1_1te.html#a8f10bf673f27ec755a0f284f3115c2ea',1,'tvm::te::operator&amp;&amp;(const Tensor::Slice &amp;a, const Tensor::Slice &amp;b)']]],
   ['operator_28_29',['operator()',['../classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html#add50f8cc72f7fbf50e456d31f045d24c',1,'tvm::arith::ConstIntBoundAnalyzer::operator()(const PrimExpr &amp;expr) const'],['../classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html#aa24f8d56b331e1b9c6ed315877c4e0a2',1,'tvm::arith::ConstIntBoundAnalyzer::operator()(const PrimExpr &amp;expr, BoundMapType *bound)'],['../classtvm_1_1arith_1_1ModularSetAnalyzer.html#a57c3c62ebc821f5f01bc2b43f06488da',1,'tvm::arith::M [...]
-  ['operator_2a',['operator*',['../classtvm_1_1relay_1_1DFPattern.html#a000e12a821910cb9bfb5c5df0b9f21ae',1,'tvm::relay::DFPattern::operator*()'],['../classtvm_1_1runtime_1_1IterAdapter.html#a4869a61f3e598fcbf8cdb3c14d66f859',1,'tvm::runtime::IterAdapter::operator*()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a0b4fc047041f8e1cb3454ee89120444a',1,'tvm::runtime::ReverseIterAdapter::operator*()'],['../classtvm_1_1runtime_1_1MapNode_1_1iterator.html#ac09b2cd5327e5102ab373b482530f1 [...]
+  ['operator_2a',['operator*',['../classtvm_1_1relay_1_1DFPattern.html#a000e12a821910cb9bfb5c5df0b9f21ae',1,'tvm::relay::DFPattern::operator*()'],['../classtvm_1_1runtime_1_1IterAdapter.html#a4869a61f3e598fcbf8cdb3c14d66f859',1,'tvm::runtime::IterAdapter::operator*()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a0b4fc047041f8e1cb3454ee89120444a',1,'tvm::runtime::ReverseIterAdapter::operator*()'],['../classtvm_1_1runtime_1_1MapNode_1_1iterator.html#ac09b2cd5327e5102ab373b482530f1 [...]
   ['operator_2a_3d',['operator*=',['../namespacetvm.html#a9233d7b436ae52ac070442ba19caed18',1,'tvm']]],
-  ['operator_2b',['operator+',['../classtvm_1_1arith_1_1IntGroupBounds.html#a5de071b5c736097d25fa1a508b06f565',1,'tvm::arith::IntGroupBounds::operator+()'],['../classtvm_1_1arith_1_1IntConstraintsTransform.html#a0a1965c2707e06b47776e574a9597db9',1,'tvm::arith::IntConstraintsTransform::operator+()'],['../classtvm_1_1relay_1_1DFPattern.html#a9074ec635ca5c4ffe0fa7b6f7d7fe394',1,'tvm::relay::DFPattern::operator+()'],['../classtvm_1_1relay_1_1FeatureSet.html#ac2448ecf37a176ad612a9765927ed4fa' [...]
+  ['operator_2b',['operator+',['../classtvm_1_1arith_1_1IntGroupBounds.html#a5de071b5c736097d25fa1a508b06f565',1,'tvm::arith::IntGroupBounds::operator+()'],['../classtvm_1_1arith_1_1IntConstraintsTransform.html#a0a1965c2707e06b47776e574a9597db9',1,'tvm::arith::IntConstraintsTransform::operator+()'],['../classtvm_1_1relay_1_1DFPattern.html#a9074ec635ca5c4ffe0fa7b6f7d7fe394',1,'tvm::relay::DFPattern::operator+()'],['../classtvm_1_1relay_1_1FeatureSet.html#ac2448ecf37a176ad612a9765927ed4fa' [...]
   ['operator_2b_2b',['operator++',['../classtvm_1_1runtime_1_1IterAdapter.html#a40f67dd792619d7d41ac45fc0f52a0c4',1,'tvm::runtime::IterAdapter::operator++()'],['../classtvm_1_1runtime_1_1IterAdapter.html#a76971f210bcd72c05992ae57b2ac8b2e',1,'tvm::runtime::IterAdapter::operator++(int)'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a362bd28f025d28551159e46c415309bd',1,'tvm::runtime::ReverseIterAdapter::operator++()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#acb427db7d19a7 [...]
   ['operator_2b_3d',['operator+=',['../classtvm_1_1relay_1_1FeatureSet.html#a73e4e0de2e58ef67a88d27a48b3c3fc0',1,'tvm::relay::FeatureSet::operator+=()'],['../namespacetvm.html#a96061840d12f84eeecc8fae11e245242',1,'tvm::operator+=()']]],
-  ['operator_2d',['operator-',['../classtvm_1_1relay_1_1DFPattern.html#ad638d57cae9db9a1d4cd8831ab60a18b',1,'tvm::relay::DFPattern::operator-()'],['../classtvm_1_1relay_1_1FeatureSet.html#ad0412921b6fe93a2f51b3c72c03c4095',1,'tvm::relay::FeatureSet::operator-()'],['../classtvm_1_1runtime_1_1IterAdapter.html#ab5671b2b9d56c2abcfb8d3f03cf6fd9e',1,'tvm::runtime::IterAdapter::operator-(difference_type offset) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a5bbe56d308014b49fec6374315f8d4 [...]
+  ['operator_2d',['operator-',['../classtvm_1_1relay_1_1DFPattern.html#ad638d57cae9db9a1d4cd8831ab60a18b',1,'tvm::relay::DFPattern::operator-()'],['../classtvm_1_1relay_1_1FeatureSet.html#ad0412921b6fe93a2f51b3c72c03c4095',1,'tvm::relay::FeatureSet::operator-()'],['../classtvm_1_1runtime_1_1IterAdapter.html#ab5671b2b9d56c2abcfb8d3f03cf6fd9e',1,'tvm::runtime::IterAdapter::operator-(difference_type offset) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#a5bbe56d308014b49fec6374315f8d4 [...]
   ['operator_2d_2d',['operator--',['../classtvm_1_1runtime_1_1IterAdapter.html#acb643cc554062422a4fd00bbfb3b0a6d',1,'tvm::runtime::IterAdapter::operator--()'],['../classtvm_1_1runtime_1_1IterAdapter.html#ad6a362ac9b5e17443f252f16f1c82703',1,'tvm::runtime::IterAdapter::operator--(int)'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#afa7ed1b09064bb3d6dea7ad886145346',1,'tvm::runtime::ReverseIterAdapter::operator--()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#a53f1d3671d30c [...]
   ['operator_2d_3d',['operator-=',['../classtvm_1_1relay_1_1FeatureSet.html#ad90ce4d929774c0feb92c3eb56bea338',1,'tvm::relay::FeatureSet::operator-=()'],['../namespacetvm.html#a164d454c519fecaa5611c86b65469acc',1,'tvm::operator-=()']]],
   ['operator_2d_3e',['operator-&gt;',['../classtvm_1_1arith_1_1IterMapResult.html#a248e87671f3870f758244125fdbb5ffb',1,'tvm::arith::IterMapResult::operator-&gt;()'],['../classtvm_1_1auto__scheduler_1_1RandomModel.html#a337aa632ca8aedc20266ce0822b32bf9',1,'tvm::auto_scheduler::RandomModel::operator-&gt;()'],['../classtvm_1_1DiagnosticRenderer.html#a21576c617844ae035b09e6e0a6fdd461',1,'tvm::DiagnosticRenderer::operator-&gt;()'],['../classtvm_1_1DiagnosticContext.html#a8f14af7e269aa0647cd9c [...]
   ['operator_2f',['operator/',['../classtvm_1_1relay_1_1DFPattern.html#ad0839b39558941ac374c213f78ca2c81',1,'tvm::relay::DFPattern::operator/()'],['../namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7',1,'tvm::operator/(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a136427374941fbf8e50f53b1cab39e38',1,'tvm::operator/(const PrimExpr &amp;a, const TB &amp;b)']]],
   ['operator_2f_3d',['operator/=',['../namespacetvm.html#a51dc569142bf8ce8ea55f73029d3807d',1,'tvm']]],
-  ['operator_3c',['operator&lt;',['../classtvm_1_1runtime_1_1ObjectRef.html#a4744bf4a1b48f202d41b51dc5e08e6ee',1,'tvm::runtime::ObjectRef::operator&lt;()'],['../namespacetvm_1_1runtime.html#a2865dffa2fddf5eff9d7ed397563ebd6',1,'tvm::runtime::operator&lt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#ad5305faaeefd679da62186dab423bdab',1,'tvm::runtime::operator&lt;(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.htm [...]
-  ['operator_3c_3c',['operator&lt;&lt;',['../classtvm_1_1DiagnosticBuilder.html#aa92a3f9039d464fbefaed90b0e255e84',1,'tvm::DiagnosticBuilder::operator&lt;&lt;()'],['../structtvm_1_1ErrorBuilder.html#ad40b754d2d8992b65d0bc5b116bd3f71',1,'tvm::ErrorBuilder::operator&lt;&lt;()'],['../namespacetvm_1_1runtime.html#af22b89284299c81d0c1802199af446d7',1,'tvm::runtime::operator&lt;&lt;(std::ostream &amp;os, const ObjectRef &amp;n)'],['../namespacetvm_1_1runtime.html#a2c20920d4a09a6c022768b353ec8d [...]
-  ['operator_3c_3d',['operator&lt;=',['../namespacetvm_1_1runtime.html#a92428efae022d4982b2644f8960d4386',1,'tvm::runtime::operator&lt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8daf39dc422f228fae2ec11a426bab28',1,'tvm::runtime::operator&lt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#a9cf2e7e67fd12d69c5bce2be881c8296',1,'tvm::runtime::operator&lt;=(const String &amp;lhs, const String &amp;rhs)'],[ [...]
+  ['operator_3c',['operator&lt;',['../classtvm_1_1runtime_1_1ObjectRef.html#a4744bf4a1b48f202d41b51dc5e08e6ee',1,'tvm::runtime::ObjectRef::operator&lt;()'],['../namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b',1,'tvm::operator&lt;()'],['../namespacetvm_1_1runtime.html#a2865dffa2fddf5eff9d7ed397563ebd6',1,'tvm::runtime::operator&lt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#ad5305faaeefd679da62186dab423bdab',1,'tvm::runtime::operator&lt;(co [...]
+  ['operator_3c_3c',['operator&lt;&lt;',['../classtvm_1_1DiagnosticBuilder.html#aa92a3f9039d464fbefaed90b0e255e84',1,'tvm::DiagnosticBuilder::operator&lt;&lt;()'],['../structtvm_1_1ErrorBuilder.html#ad40b754d2d8992b65d0bc5b116bd3f71',1,'tvm::ErrorBuilder::operator&lt;&lt;()'],['../namespacetvm.html#af682776c3609284f1bc3ea436e21a67a',1,'tvm::operator&lt;&lt;()'],['../namespacetvm_1_1runtime.html#af22b89284299c81d0c1802199af446d7',1,'tvm::runtime::operator&lt;&lt;(std::ostream &amp;os, con [...]
+  ['operator_3c_3d',['operator&lt;=',['../namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d',1,'tvm::operator&lt;=()'],['../namespacetvm_1_1runtime.html#a92428efae022d4982b2644f8960d4386',1,'tvm::runtime::operator&lt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8daf39dc422f228fae2ec11a426bab28',1,'tvm::runtime::operator&lt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#a9cf2e7e67fd12d69c5bce2be881c82 [...]
   ['operator_3d',['operator=',['../classtvm_1_1arith_1_1Analyzer.html#a9dccc7d98b8b9465390e10436d3a9178',1,'tvm::arith::Analyzer::operator=()'],['../classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#aab332907b9f98876f441f6403b801187',1,'tvm::TypedEnvFunc&lt; R(Args...)&gt;::operator=()'],['../classtvm_1_1Integer.html#ad538a2ae6f636b3ce38fb4162b1c2549',1,'tvm::Integer::operator=()'],['../classtvm_1_1relay_1_1PatternWildcard.html#aef430b36a4be504969829f800e126245',1,'tvm::relay::Patter [...]
-  ['operator_3d_3d',['operator==',['../classtvm_1_1Integer.html#a5b9ad6d47c7c6df5a066d58f6ba65f8e',1,'tvm::Integer::operator==(int other) const'],['../classtvm_1_1Integer.html#ad2291d037ff36f5371f6381478b3eeaf',1,'tvm::Integer::operator==(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#ac4066242008f1e60533099615a20fdbd',1,'tvm::runtime::IterAdapter::operator==()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#ae71116873ea643de9f8ec1836cc116db',1,'tvm::runtime::Reve [...]
-  ['operator_3e',['operator&gt;',['../namespacetvm_1_1runtime.html#af7996be0c57184e8ff9e655d0d055edc',1,'tvm::runtime::operator&gt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a031e6c8e64cd9db11754355e3250ab4c',1,'tvm::runtime::operator&gt;(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#abf23f2e6b19c87ecf3f27a7506ddf4c0',1,'tvm::runtime::operator&gt;(const String &amp;lhs, const String &amp;rhs)'],['../nam [...]
-  ['operator_3e_3d',['operator&gt;=',['../namespacetvm_1_1runtime.html#a944029d40e689e4d2acab53ce36a5c99',1,'tvm::runtime::operator&gt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8f8694e651078c6df7c5c26898ee9f14',1,'tvm::runtime::operator&gt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#af7310fb8b9944f41f8f30021d92847eb',1,'tvm::runtime::operator&gt;=(const String &amp;lhs, const String &amp;rhs)'],[ [...]
-  ['operator_3e_3e',['operator&gt;&gt;',['../namespacetvm_1_1te.html#a4a8524467a57ae005654a3f0cb816e3f',1,'tvm::te::operator&gt;&gt;(const Tensor::Slice &amp;a, const T &amp;b)'],['../namespacetvm_1_1te.html#a8705a88b943011532ff4c94c4b06c213',1,'tvm::te::operator&gt;&gt;(const T &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm_1_1te.html#a9d3d9a057d5f1a36277ac4005f38bafa',1,'tvm::te::operator&gt;&gt;(const Tensor::Slice &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm.html# [...]
+  ['operator_3d_3d',['operator==',['../classtvm_1_1Integer.html#a5b9ad6d47c7c6df5a066d58f6ba65f8e',1,'tvm::Integer::operator==(int other) const'],['../classtvm_1_1Integer.html#ad2291d037ff36f5371f6381478b3eeaf',1,'tvm::Integer::operator==(Enum other) const'],['../classtvm_1_1runtime_1_1IterAdapter.html#ac4066242008f1e60533099615a20fdbd',1,'tvm::runtime::IterAdapter::operator==()'],['../classtvm_1_1runtime_1_1ReverseIterAdapter.html#ae71116873ea643de9f8ec1836cc116db',1,'tvm::runtime::Reve [...]
+  ['operator_3e',['operator&gt;',['../namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041',1,'tvm::operator&gt;()'],['../namespacetvm_1_1runtime.html#af7996be0c57184e8ff9e655d0d055edc',1,'tvm::runtime::operator&gt;(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a031e6c8e64cd9db11754355e3250ab4c',1,'tvm::runtime::operator&gt;(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#abf23f2e6b19c87ecf3f27a7506ddf4c0',1,' [...]
+  ['operator_3e_3d',['operator&gt;=',['../namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf',1,'tvm::operator&gt;=()'],['../namespacetvm_1_1runtime.html#a944029d40e689e4d2acab53ce36a5c99',1,'tvm::runtime::operator&gt;=(const String &amp;lhs, const std::string &amp;rhs)'],['../namespacetvm_1_1runtime.html#a8f8694e651078c6df7c5c26898ee9f14',1,'tvm::runtime::operator&gt;=(const std::string &amp;lhs, const String &amp;rhs)'],['../namespacetvm_1_1runtime.html#af7310fb8b9944f41f8f30021d92847 [...]
+  ['operator_3e_3e',['operator&gt;&gt;',['../namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41',1,'tvm::operator&gt;&gt;()'],['../namespacetvm_1_1te.html#a4a8524467a57ae005654a3f0cb816e3f',1,'tvm::te::operator&gt;&gt;(const Tensor::Slice &amp;a, const T &amp;b)'],['../namespacetvm_1_1te.html#a8705a88b943011532ff4c94c4b06c213',1,'tvm::te::operator&gt;&gt;(const T &amp;a, const Tensor::Slice &amp;b)'],['../namespacetvm_1_1te.html#a9d3d9a057d5f1a36277ac4005f38bafa',1,'tvm::te::operator&gt [...]
   ['operator_5b_5d',['operator[]',['../classtvm_1_1AttrRegistryMapContainerMap.html#a713c3d1884423e3e67e3fdaef2566925',1,'tvm::AttrRegistryMapContainerMap::operator[]()'],['../classtvm_1_1AttrRegistryMap.html#ae4152d6da01b645eeff90fbaeac7c14f',1,'tvm::AttrRegistryMap::operator[]()'],['../classtvm_1_1runtime_1_1ADT.html#a88cf389012aad0e129c84c056ae546fc',1,'tvm::runtime::ADT::operator[]()'],['../classtvm_1_1runtime_1_1Array.html#ac84683f61368eb10e26e72a0097cccd7',1,'tvm::runtime::Array::o [...]
   ['operator_5e',['operator^',['../namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f',1,'tvm::operator^(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a82dc2fe21e7a64be5a1b11c2a8775d31',1,'tvm::operator^(const PrimExpr &amp;a, int b)'],['../namespacetvm.html#a6f638564e5e4d1023096523800f2579e',1,'tvm::operator^(int a, const PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#abef6b4a192138a38c651c2c347e60263',1,'tvm::topi::operator^(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &a [...]
   ['operator_7c',['operator|',['../namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d',1,'tvm::operator|(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a7c7fc3c45e6f6b52b2a1064deabd0797',1,'tvm::operator|(const PrimExpr &amp;a, int b)'],['../namespacetvm.html#ad5ba71021b167b0a6ca2138b2c8bbace',1,'tvm::operator|(int a, const PrimExpr &amp;b)'],['../namespacetvm_1_1topi.html#a0e3d0c113031f4b209febd097e426e06',1,'tvm::topi::operator|(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &a [...]
-  ['operator_7c_7c',['operator||',['../classtvm_1_1relay_1_1DFPattern.html#ab8caf2bf80291b17922190cab174e11d',1,'tvm::relay::DFPattern::operator||()'],['../namespacetvm.html#a002710a4652156a57495e10a09b5d002',1,'tvm::operator||(const Bool &amp;a, bool b)'],['../namespacetvm.html#a4c8c1c1c248859ce0d20f614e18a9524',1,'tvm::operator||(bool a, const Bool &amp;b)'],['../namespacetvm.html#ae4ef6ceffc5778d734c2ddfc72020d60',1,'tvm::operator||(const Bool &amp;a, const Bool &amp;b)'],['../namespa [...]
+  ['operator_7c_7c',['operator||',['../classtvm_1_1relay_1_1DFPattern.html#ab8caf2bf80291b17922190cab174e11d',1,'tvm::relay::DFPattern::operator||()'],['../namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6',1,'tvm::operator||(PrimExpr a, PrimExpr b)'],['../namespacetvm.html#a002710a4652156a57495e10a09b5d002',1,'tvm::operator||(const Bool &amp;a, bool b)'],['../namespacetvm.html#a4c8c1c1c248859ce0d20f614e18a9524',1,'tvm::operator||(bool a, const Bool &amp;b)'],['../namespacetvm.html#ae4 [...]
   ['operator_7e',['operator~',['../namespacetvm.html#a354b9954ff25dd819a51d856fdd38827',1,'tvm']]],
   ['opstarthook',['OpStartHook',['../classtvm_1_1runtime_1_1vm_1_1VirtualMachine.html#a4e3d0f62ef25486968335336d1445bfa',1,'tvm::runtime::vm::VirtualMachine']]],
   ['opstophook',['OpStopHook',['../classtvm_1_1runtime_1_1vm_1_1VirtualMachine.html#a4f9142d370e9629f86b792c4328c24a4',1,'tvm::runtime::vm::VirtualMachine']]],
diff --git a/docs/reference/api/doxygen/strided__slice_8h_source.html b/docs/reference/api/doxygen/strided__slice_8h_source.html
index 7d0010d1a..0a5acc98f 100644
--- a/docs/reference/api/doxygen/strided__slice_8h_source.html
+++ b/docs/reference/api/doxygen/strided__slice_8h_source.html
@@ -69,12 +69,12 @@ $(function() {
 <a href="strided__slice_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or m [...]
 <div class="ttc" id="namespacetvm_html_ab2a3c98ef29937defd6accb9b171a940"><div class="ttname"><a href="namespacetvm.html#ab2a3c98ef29937defd6accb9b171a940">tvm::abs</a></div><div class="ttdeci">PrimExpr abs(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculate absolute value of x. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_aff75b2e533b654909ca1bd9485ef4e6b"><div class="ttname"><a href="namespacetvm_1_1topi.html#aff75b2e533b654909ca1bd9485ef4e6b">tvm::topi::StridedSliceOutputShape</a></div><div class="ttdeci">Array&lt; PrimExpr &gt; StridedSliceOutputShape(const Array&lt; PrimExpr &gt; &amp;ishape, const Array&lt; Integer &gt; &amp;begin, const Array&lt; Integer &gt; &amp;end, const Array&lt; Integer &gt; &amp;strides, const Array&lt; Integer &gt; &amp;axes, co [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
 <div class="ttc" id="namespacetvm_html_a353217978feabae3575560bf1586885f"><div class="ttname"><a href="namespacetvm.html#a353217978feabae3575560bf1586885f">tvm::if_then_else</a></div><div class="ttdeci">PrimExpr if_then_else(PrimExpr cond, PrimExpr true_value, PrimExpr false_value, Span span=Span())</div><div class="ttdoc">Conditional expression. </div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="constant__utils_8h_html"><div class="ttname"><a href="constant__utils_8h.html">constant_utils.h</a></div><div class="ttdoc">Utility functions for handling constants in TVM expressions. </div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Any_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Any.html">tvm::tir::Any</a></div><div class="ttdoc">Managed reference to AnyNode. </div><div class="ttdef"><b>Definition:</b> expr.h:1157</div></div>
 <div class="ttc" id="tir_2expr_8h_html"><div class="ttname"><a href="tir_2expr_8h.html">expr.h</a></div><div class="ttdoc">TIR expressions. </div></div>
diff --git a/docs/reference/api/doxygen/tir_2analysis_8h_source.html b/docs/reference/api/doxygen/tir_2analysis_8h_source.html
index f8f6e2638..07142e2e4 100644
--- a/docs/reference/api/doxygen/tir_2analysis_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2analysis_8h_source.html
@@ -105,7 +105,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_aaa95d3ad68932ab206efbe0a326db6a2"><div class="ttname"><a href="namespacetvm_1_1topi.html#aaa95d3ad68932ab206efbe0a326db6a2">tvm::topi::mod</a></div><div class="ttdeci">tvm::PrimExpr mod(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:290</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a2d76fa1fb628ff276a284e61123589c5"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a2d76fa1fb628ff276a284e61123589c5">tvm::runtime::ObjectRef::as</a></div><div class="ttdeci">const ObjectType * as() const</div><div class="ttdoc">Try to downcast the internal Object to a raw pointer of a corresponding type. </div><div class="ttdef"><b>Definition:</b> object.h:865</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_a1674ab4f532cf0bd0a96a12165105d82"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1674ab4f532cf0bd0a96a12165105d82">tvm::tir::EstimateTIRFlops</a></div><div class="ttdeci">double EstimateTIRFlops(const Stmt &amp;stmt)</div><div class="ttdoc">Estimate the FLOPs of a TIR fragment. </div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/tir_2expr_8h_source.html b/docs/reference/api/doxygen/tir_2expr_8h_source.html
index de0f495c1..f8e74865f 100644
--- a/docs/reference/api/doxygen/tir_2expr_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2expr_8h_source.html
@@ -112,7 +112,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1AnyNode_html_a741b57467c114e83182910df14fca3be"><div class="ttname"><a href="classtvm_1_1tir_1_1AnyNode.html#a741b57467c114e83182910df14fca3be">tvm::tir::AnyNode::ToSizeVar</a></div><div class="ttdeci">SizeVar ToSizeVar() const</div><div class="ttdoc">Convert to SizeVar. </div><div class="ttdef"><b>Definition:</b> expr.h:1147</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1StringImmNode_html_ac01ffc8ae6ef913d1fbc34fb7f23847c"><div class="ttname"><a href="classtvm_1_1tir_1_1StringImmNode.html#ac01ffc8ae6ef913d1fbc34fb7f23847c">tvm::tir::StringImmNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:58</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:321</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:535</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1IterVar_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IterVar.html">tvm::tir::IterVar</a></div><div class="ttdoc">Iteration Variable, represents an iteration over an integer interval. </div><div class="ttdef"><b>Definition:</b> var.h:301</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1SelectNode_html_a35e913f542bd1231b92adfc2d58938ef"><div class="ttname"><a href="classtvm_1_1tir_1_1SelectNode.html#a35e913f542bd1231b92adfc2d58938ef">tvm::tir::SelectNode::true_value</a></div><div class="ttdeci">PrimExpr true_value</div><div class="ttdoc">value to be returned when condition is true. </div><div class="ttdef"><b>Definition:</b> expr.h:557</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Min_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Min.html">tvm::tir::Min</a></div><div class="ttdoc">Managed reference to MinNode. </div><div class="ttdef"><b>Definition:</b> expr.h:282</div></div>
@@ -142,7 +142,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1AddNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1AddNode.html">tvm::tir::AddNode</a></div><div class="ttdoc">a + b </div><div class="ttdef"><b>Definition:</b> expr.h:155</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1LE_html"><div class="ttname"><a href="classtvm_1_1tir_1_1LE.html">tvm::tir::LE</a></div><div class="ttdoc">Managed reference to LENode. </div><div class="ttdef"><b>Definition:</b> expr.h:394</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1OrNode_html_a0515fcd2eb360181e478fec3da259d34"><div class="ttname"><a href="classtvm_1_1tir_1_1OrNode.html#a0515fcd2eb360181e478fec3da259d34">tvm::tir::OrNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const OrNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:486</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1RampNode_html_aed87b929f940da5ded10afaa7a3a810c"><div class="ttname"><a href="classtvm_1_1tir_1_1RampNode.html#aed87b929f940da5ded10afaa7a3a810c">tvm::tir::RampNode::base</a></div><div class="ttdeci">PrimExpr base</div><div class="ttdoc">The base value. </div><div class="ttdef"><b>Definition:</b> expr.h:782</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1LoadNode_html_a0133c1a78c31b6eba24036a1f410cc1b"><div class="ttname"><a href="classtvm_1_1tir_1_1LoadNode.html#a0133c1a78c31b6eba24036a1f410cc1b">tvm::tir::LoadNode::index</a></div><div class="ttdeci">PrimExpr index</div><div class="ttdoc">The index locations to be loaded. </div><div class="ttdef"><b>Definition:</b> expr.h:731</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ReduceNode_html_a9afa77e5fe462dd6d75740fb2b504dfa"><div class="ttname"><a href="classtvm_1_1tir_1_1ReduceNode.html#a9afa77e5fe462dd6d75740fb2b504dfa">tvm::tir::ReduceNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> expr.h:1084</div></div>
@@ -195,7 +195,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1CastNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1CastNode.html">tvm::tir::CastNode</a></div><div class="ttdoc">Cast value from one data type to another. </div><div class="ttdef"><b>Definition:</b> expr.h:88</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1CommReducerNode_html_aa2a1b2580c68193d8e122d1a525a3092"><div class="ttname"><a href="classtvm_1_1tir_1_1CommReducerNode.html#aa2a1b2580c68193d8e122d1a525a3092">tvm::tir::CommReducerNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> expr.h:1040</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
-<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:217</div></div>
+<div class="ttc" id="classtvm_1_1RelayExpr_html"><div class="ttname"><a href="classtvm_1_1RelayExpr.html">tvm::RelayExpr</a></div><div class="ttdoc">Managed reference to RelayExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:431</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ShuffleNode_html_af92dbc6a0550db6a5eadaff1ca620bee"><div class="ttname"><a href="classtvm_1_1tir_1_1ShuffleNode.html#af92dbc6a0550db6a5eadaff1ca620bee">tvm::tir::ShuffleNode::SEqualReduce</a></div><div class="ttdeci">bool SEqualReduce(const ShuffleNode *other, SEqualReducer equal) const</div><div class="ttdef"><b>Definition:</b> expr.h:972</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1StringImmNode_html_a99c1ae09343d2a751523871480eba905"><div class="ttname"><a href="classtvm_1_1tir_1_1StringImmNode.html#a99c1ae09343d2a751523871480eba905">tvm::tir::StringImmNode::TVM_DECLARE_FINAL_OBJECT_INFO</a></div><div class="ttdeci">TVM_DECLARE_FINAL_OBJECT_INFO(StringImmNode, PrimExprNode)</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Max_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Max.html">tvm::tir::Max</a></div><div class="ttdoc">Managed reference to MaxNode. </div><div class="ttdef"><b>Definition:</b> expr.h:298</div></div>
diff --git a/docs/reference/api/doxygen/tir_2expr__functor_8h_source.html b/docs/reference/api/doxygen/tir_2expr__functor_8h_source.html
index 045ff5ef7..38d318074 100644
--- a/docs/reference/api/doxygen/tir_2expr__functor_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2expr__functor_8h_source.html
@@ -80,7 +80,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_aaf87a9c9650901e8e88250821c76725c"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#aaf87a9c9650901e8e88250821c76725c">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const FloorModNode *op, Args... args)</div><div class="ttdef"><b>Defin [...]
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a55c70ed3207e5c48d5b8c0eae989cc41"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a55c70ed3207e5c48d5b8c0eae989cc41">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const ReduceNode *op, Args... args)</div><div class="ttdef"><b>Definit [...]
 <div class="ttc" id="classtvm_1_1tir_1_1StringImmNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1StringImmNode.html">tvm::tir::StringImmNode</a></div><div class="ttdoc">String constants, only used in asserts. </div><div class="ttdef"><b>Definition:</b> expr.h:53</div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:321</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:535</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a26ef2dea0abc6f927e4fedc56ef0b3e9"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a26ef2dea0abc6f927e4fedc56ef0b3e9">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const LetNode *op, Args... args)</div><div class="ttdef"><b>Definition [...]
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a463951999eb3aa125880d5b6aa1f9191"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a463951999eb3aa125880d5b6aa1f9191">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::~ExprFunctor</a></div><div class="ttdeci">virtual ~ExprFunctor()</div><div class="ttdoc">virtual destructor </div><div class="ttdef [...]
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_acb40e66c38e946d830a0418520661586"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#acb40e66c38e946d830a0418520661586">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const FloatImmNode *op, Args... args)</div><div class="ttdef"><b>Defin [...]
@@ -93,7 +93,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a9f18d0dac340380dfd22737e3aee6aee"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a9f18d0dac340380dfd22737e3aee6aee">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const LENode *op, Args... args)</div><div class="ttdef"><b>Definition: [...]
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a75f11907cca61167b90117b89e3b8818"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a75f11907cca61167b90117b89e3b8818">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const AddNode *op, Args... args)</div><div class="ttdef"><b>Definition [...]
 <div class="ttc" id="classtvm_1_1tir_1_1AddNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1AddNode.html">tvm::tir::AddNode</a></div><div class="ttdoc">a + b </div><div class="ttdef"><b>Definition:</b> expr.h:155</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a7a6233828f233b9d3d415dd3ac75518b"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a7a6233828f233b9d3d415dd3ac75518b">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const SubNode *op, Args... args)</div><div class="ttdef"><b>Definition [...]
 <div class="ttc" id="classtvm_1_1tir_1_1OrNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1OrNode.html">tvm::tir::OrNode</a></div><div class="ttdoc">a || b </div><div class="ttdef"><b>Definition:</b> expr.h:472</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4_html_a009fe51d7a185bce0c6e72f1c860ac30"><div class="ttname"><a href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html#a009fe51d7a185bce0c6e72f1c860ac30">tvm::tir::ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;::VisitExpr_</a></div><div class="ttdeci">virtual R VisitExpr_(const CastNode *op, Args... args)</div><div class="ttdef"><b>Definitio [...]
diff --git a/docs/reference/api/doxygen/tir_2function_8h_source.html b/docs/reference/api/doxygen/tir_2function_8h_source.html
index cb4b3e1ba..e389fdc5d 100644
--- a/docs/reference/api/doxygen/tir_2function_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2function_8h_source.html
@@ -94,7 +94,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1BaseExprNode_html_ae3a0760a9f8b1379bc86f13e8bb3a22e"><div class="ttname"><a href="classtvm_1_1BaseExprNode.html#ae3a0760a9f8b1379bc86f13e8bb3a22e">tvm::BaseExprNode::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">Span that points to the original source code. Reserved debug information. </div><div class="ttdef"><b>Definition:</b> expr.h:55</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1PrimFuncNode_html_a4ea77411935e78d975414f31e153fc78"><div class="ttname"><a href="classtvm_1_1tir_1_1PrimFuncNode.html#a4ea77411935e78d975414f31e153fc78">tvm::tir::PrimFuncNode::params</a></div><div class="ttdeci">Array&lt; tir::Var &gt; params</div><div class="ttdoc">Function parameters. </div><div class="ttdef"><b>Definition:</b> function.h:49</div></div>
 <div class="ttc" id="tir_2expr_8h_html"><div class="ttname"><a href="tir_2expr_8h.html">expr.h</a></div><div class="ttdoc">TIR expressions. </div></div>
-<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:153</div></div>
+<div class="ttc" id="classtvm_1_1RelayExprNode_html_ae30ca49a8b84288fbc21d5f312f02929"><div class="ttname"><a href="classtvm_1_1RelayExprNode.html#ae30ca49a8b84288fbc21d5f312f02929">tvm::RelayExprNode::checked_type_</a></div><div class="ttdeci">Type checked_type_</div><div class="ttdoc">Stores the result of type inference(type checking). </div><div class="ttdef"><b>Definition:</b> expr.h:367</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1PrimFunc_html"><div class="ttname"><a href="classtvm_1_1tir_1_1PrimFunc.html">tvm::tir::PrimFunc</a></div><div class="ttdoc">Managed reference to PrimFuncNode. </div><div class="ttdef"><b>Definition:</b> function.h:156</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1PrimFuncNode_html_a547273727ddd02acbf44ff46b3a420c2"><div class="ttname"><a href="classtvm_1_1tir_1_1PrimFuncNode.html#a547273727ddd02acbf44ff46b3a420c2">tvm::tir::PrimFuncNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> function.h:131</div></div>
diff --git a/docs/reference/api/doxygen/tir_2op_8h.html b/docs/reference/api/doxygen/tir_2op_8h.html
index 59ae68b26..9be4b5872 100644
--- a/docs/reference/api/doxygen/tir_2op_8h.html
+++ b/docs/reference/api/doxygen/tir_2op_8h.html
@@ -154,96 +154,48 @@ Functions</h2></td></tr>
 <tr class="memitem:ae2794f261657780b2af4208b95d9cfcb"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ae2794f261657780b2af4208b95d9cfcb">tvm::add</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:ae2794f261657780b2af4208b95d9cfcb"><td class="mdescLeft">&#160;</td><td class="mdescRight">add operator  <a href="namespacetvm.html#ae2794f261657780b2af4208b95d9cfcb">More...</a><br /></td></tr>
 <tr class="separator:ae2794f261657780b2af4208b95d9cfcb"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:af246f441d4ac21b110185b77240b2dcc"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">tvm::operator+</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:af246f441d4ac21b110185b77240b2dcc"><td class="mdescLeft">&#160;</td><td class="mdescRight">add operator  <a href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">More...</a><br /></td></tr>
-<tr class="separator:af246f441d4ac21b110185b77240b2dcc"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a89da021f5e3e2e911acfd96f973e5bc3"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a89da021f5e3e2e911acfd96f973e5bc3">tvm::sub</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a89da021f5e3e2e911acfd96f973e5bc3"><td class="mdescLeft">&#160;</td><td class="mdescRight">subtraction operator  <a href="namespacetvm.html#a89da021f5e3e2e911acfd96f973e5bc3">More...</a><br /></td></tr>
 <tr class="separator:a89da021f5e3e2e911acfd96f973e5bc3"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abde487c0197942c4ebb1b47277b89dac"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">tvm::operator-</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:abde487c0197942c4ebb1b47277b89dac"><td class="mdescLeft">&#160;</td><td class="mdescRight">subtraction operator  <a href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">More...</a><br /></td></tr>
-<tr class="separator:abde487c0197942c4ebb1b47277b89dac"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a5cd85b156fb31f75f91c8a5c012f8a66"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5cd85b156fb31f75f91c8a5c012f8a66">tvm::neg</a> (PrimExpr a, Span span=Span())</td></tr>
 <tr class="memdesc:a5cd85b156fb31f75f91c8a5c012f8a66"><td class="mdescLeft">&#160;</td><td class="mdescRight">negation.  <a href="namespacetvm.html#a5cd85b156fb31f75f91c8a5c012f8a66">More...</a><br /></td></tr>
 <tr class="separator:a5cd85b156fb31f75f91c8a5c012f8a66"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abc417454badf61b154d6a8d87cd8f171"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">tvm::operator-</a> (PrimExpr a)</td></tr>
-<tr class="memdesc:abc417454badf61b154d6a8d87cd8f171"><td class="mdescLeft">&#160;</td><td class="mdescRight">negation.  <a href="namespacetvm.html#abc417454badf61b154d6a8d87cd8f171">More...</a><br /></td></tr>
-<tr class="separator:abc417454badf61b154d6a8d87cd8f171"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:aaa28e92b677086d89ebfb77204bf92a2"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#aaa28e92b677086d89ebfb77204bf92a2">tvm::mul</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:aaa28e92b677086d89ebfb77204bf92a2"><td class="mdescLeft">&#160;</td><td class="mdescRight">multiplication operator  <a href="namespacetvm.html#aaa28e92b677086d89ebfb77204bf92a2">More...</a><br /></td></tr>
 <tr class="separator:aaa28e92b677086d89ebfb77204bf92a2"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">tvm::operator*</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a5c5034de2993b9130b7bd9d593a11bb5"><td class="mdescLeft">&#160;</td><td class="mdescRight">multiplication operator  <a href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">More...</a><br /></td></tr>
-<tr class="separator:a5c5034de2993b9130b7bd9d593a11bb5"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a18256ba1213ce5ff3cf8037a314354b7"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">tvm::operator/</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a18256ba1213ce5ff3cf8037a314354b7"><td class="mdescLeft">&#160;</td><td class="mdescRight">division operator  <a href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">More...</a><br /></td></tr>
-<tr class="separator:a18256ba1213ce5ff3cf8037a314354b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ad4fceb4266c6e7644fa373eacf73359f"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad4fceb4266c6e7644fa373eacf73359f">tvm::left_shift</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:ad4fceb4266c6e7644fa373eacf73359f"><td class="mdescLeft">&#160;</td><td class="mdescRight">left shift operator  <a href="namespacetvm.html#ad4fceb4266c6e7644fa373eacf73359f">More...</a><br /></td></tr>
 <tr class="separator:ad4fceb4266c6e7644fa373eacf73359f"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:af682776c3609284f1bc3ea436e21a67a"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">tvm::operator&lt;&lt;</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:af682776c3609284f1bc3ea436e21a67a"><td class="mdescLeft">&#160;</td><td class="mdescRight">left shift operator  <a href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">More...</a><br /></td></tr>
-<tr class="separator:af682776c3609284f1bc3ea436e21a67a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ae8ecc0382685a855187bede0c97d93e6"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ae8ecc0382685a855187bede0c97d93e6">tvm::right_shift</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:ae8ecc0382685a855187bede0c97d93e6"><td class="mdescLeft">&#160;</td><td class="mdescRight">right shift operator  <a href="namespacetvm.html#ae8ecc0382685a855187bede0c97d93e6">More...</a><br /></td></tr>
 <tr class="separator:ae8ecc0382685a855187bede0c97d93e6"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">tvm::operator&gt;&gt;</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="mdescLeft">&#160;</td><td class="mdescRight">right shift operator  <a href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">More...</a><br /></td></tr>
-<tr class="separator:a1ce1eb32fc9d76ebe5a6b8d185024d41"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a7ffc1cdb3a52b680e4b509395c9a252d"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7ffc1cdb3a52b680e4b509395c9a252d">tvm::greater</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a7ffc1cdb3a52b680e4b509395c9a252d"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater  <a href="namespacetvm.html#a7ffc1cdb3a52b680e4b509395c9a252d">More...</a><br /></td></tr>
 <tr class="separator:a7ffc1cdb3a52b680e4b509395c9a252d"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ad93d00f7b080dc3f905f5c34c170a041"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">tvm::operator&gt;</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:ad93d00f7b080dc3f905f5c34c170a041"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater  <a href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">More...</a><br /></td></tr>
-<tr class="separator:ad93d00f7b080dc3f905f5c34c170a041"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ab1b704bb5a31b602869fb5c94a56f468"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ab1b704bb5a31b602869fb5c94a56f468">tvm::greater_equal</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:ab1b704bb5a31b602869fb5c94a56f468"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater_equal  <a href="namespacetvm.html#ab1b704bb5a31b602869fb5c94a56f468">More...</a><br /></td></tr>
 <tr class="separator:ab1b704bb5a31b602869fb5c94a56f468"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a5530417da455bd46f5dc55f27d69bcdf"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">tvm::operator&gt;=</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a5530417da455bd46f5dc55f27d69bcdf"><td class="mdescLeft">&#160;</td><td class="mdescRight">greater_equal  <a href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">More...</a><br /></td></tr>
-<tr class="separator:a5530417da455bd46f5dc55f27d69bcdf"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a52fa1dc57423a077eb098960162e7b85"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a52fa1dc57423a077eb098960162e7b85">tvm::less</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a52fa1dc57423a077eb098960162e7b85"><td class="mdescLeft">&#160;</td><td class="mdescRight">less  <a href="namespacetvm.html#a52fa1dc57423a077eb098960162e7b85">More...</a><br /></td></tr>
 <tr class="separator:a52fa1dc57423a077eb098960162e7b85"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">tvm::operator&lt;</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="mdescLeft">&#160;</td><td class="mdescRight">less  <a href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">More...</a><br /></td></tr>
-<tr class="separator:a1f98476c3a413f6cdfc7b7e490f3221b"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a6dfe80d16a7b4f551c87a8901d366d08"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a6dfe80d16a7b4f551c87a8901d366d08">tvm::less_equal</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a6dfe80d16a7b4f551c87a8901d366d08"><td class="mdescLeft">&#160;</td><td class="mdescRight">less_equal  <a href="namespacetvm.html#a6dfe80d16a7b4f551c87a8901d366d08">More...</a><br /></td></tr>
 <tr class="separator:a6dfe80d16a7b4f551c87a8901d366d08"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a598f8139c469abc4066dbdd0a0a0845d"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">tvm::operator&lt;=</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a598f8139c469abc4066dbdd0a0a0845d"><td class="mdescLeft">&#160;</td><td class="mdescRight">less_equal  <a href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">More...</a><br /></td></tr>
-<tr class="separator:a598f8139c469abc4066dbdd0a0a0845d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a1c4f14382b85bcfa57d9a3460db2354a"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a1c4f14382b85bcfa57d9a3460db2354a">tvm::equal</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a1c4f14382b85bcfa57d9a3460db2354a"><td class="mdescLeft">&#160;</td><td class="mdescRight">equal  <a href="namespacetvm.html#a1c4f14382b85bcfa57d9a3460db2354a">More...</a><br /></td></tr>
 <tr class="separator:a1c4f14382b85bcfa57d9a3460db2354a"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">tvm::operator==</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a2ea3b45c96d3980227e418f7158ce5c3"><td class="mdescLeft">&#160;</td><td class="mdescRight">equal  <a href="namespacetvm.html#a2ea3b45c96d3980227e418f7158ce5c3">More...</a><br /></td></tr>
-<tr class="separator:a2ea3b45c96d3980227e418f7158ce5c3"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:ac3932d85fd31819eae6a80841296af51"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ac3932d85fd31819eae6a80841296af51">tvm::not_equal</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:ac3932d85fd31819eae6a80841296af51"><td class="mdescLeft">&#160;</td><td class="mdescRight">not_equal  <a href="namespacetvm.html#ac3932d85fd31819eae6a80841296af51">More...</a><br /></td></tr>
 <tr class="separator:ac3932d85fd31819eae6a80841296af51"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a03983cf66713724c138f9697bb8e0e97"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">tvm::operator!=</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a03983cf66713724c138f9697bb8e0e97"><td class="mdescLeft">&#160;</td><td class="mdescRight">not_equal  <a href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">More...</a><br /></td></tr>
-<tr class="separator:a03983cf66713724c138f9697bb8e0e97"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a27d5567b95675d383c4675fdcd85346c"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a27d5567b95675d383c4675fdcd85346c">tvm::logical_and</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a27d5567b95675d383c4675fdcd85346c"><td class="mdescLeft">&#160;</td><td class="mdescRight">and  <a href="namespacetvm.html#a27d5567b95675d383c4675fdcd85346c">More...</a><br /></td></tr>
 <tr class="separator:a27d5567b95675d383c4675fdcd85346c"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a7579d33e0aac9600dec46264a3f1edb8"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">tvm::operator &amp;&amp;</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a7579d33e0aac9600dec46264a3f1edb8"><td class="mdescLeft">&#160;</td><td class="mdescRight">and  <a href="namespacetvm.html#a7579d33e0aac9600dec46264a3f1edb8">More...</a><br /></td></tr>
-<tr class="separator:a7579d33e0aac9600dec46264a3f1edb8"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a4509dece1af96338cc25097855fcecd7"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a4509dece1af96338cc25097855fcecd7">tvm::logical_or</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a4509dece1af96338cc25097855fcecd7"><td class="mdescLeft">&#160;</td><td class="mdescRight">or  <a href="namespacetvm.html#a4509dece1af96338cc25097855fcecd7">More...</a><br /></td></tr>
 <tr class="separator:a4509dece1af96338cc25097855fcecd7"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">tvm::operator||</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="mdescLeft">&#160;</td><td class="mdescRight">or  <a href="namespacetvm.html#ac3bf2ef3556c995846dddcd84e5db8a6">More...</a><br /></td></tr>
-<tr class="separator:ac3bf2ef3556c995846dddcd84e5db8a6"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a62955df1df48917116efe39d4cd18fec"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a62955df1df48917116efe39d4cd18fec">tvm::logical_not</a> (PrimExpr a, Span span=Span())</td></tr>
 <tr class="memdesc:a62955df1df48917116efe39d4cd18fec"><td class="mdescLeft">&#160;</td><td class="mdescRight">not  <a href="namespacetvm.html#a62955df1df48917116efe39d4cd18fec">More...</a><br /></td></tr>
 <tr class="separator:a62955df1df48917116efe39d4cd18fec"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:ab354bf1270121abea71fade83f13b0b0"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">tvm::operator!</a> (PrimExpr a)</td></tr>
-<tr class="memdesc:ab354bf1270121abea71fade83f13b0b0"><td class="mdescLeft">&#160;</td><td class="mdescRight">not  <a href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">More...</a><br /></td></tr>
-<tr class="separator:ab354bf1270121abea71fade83f13b0b0"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a16f9cd9219b505e2cc05c5a7558ac61f"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">tvm::div</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a16f9cd9219b505e2cc05c5a7558ac61f"><td class="mdescLeft">&#160;</td><td class="mdescRight">compute division in C semantics.  <a href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">More...</a><br /></td></tr>
 <tr class="separator:a16f9cd9219b505e2cc05c5a7558ac61f"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -280,27 +232,15 @@ Functions</h2></td></tr>
 <tr class="memitem:acebb0c446b76d5a28c3b1b55f827c86e"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#acebb0c446b76d5a28c3b1b55f827c86e">tvm::bitwise_and</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:acebb0c446b76d5a28c3b1b55f827c86e"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise and of two values  <a href="namespacetvm.html#acebb0c446b76d5a28c3b1b55f827c86e">More...</a><br /></td></tr>
 <tr class="separator:acebb0c446b76d5a28c3b1b55f827c86e"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a2a1269a38e7e3621eb2906a47157106a"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">tvm::operator &amp;</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a2a1269a38e7e3621eb2906a47157106a"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise and of two values  <a href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">More...</a><br /></td></tr>
-<tr class="separator:a2a1269a38e7e3621eb2906a47157106a"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:aee8d9c7084d8df28bf6f05e0851a557f"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#aee8d9c7084d8df28bf6f05e0851a557f">tvm::bitwise_or</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:aee8d9c7084d8df28bf6f05e0851a557f"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise or of two values  <a href="namespacetvm.html#aee8d9c7084d8df28bf6f05e0851a557f">More...</a><br /></td></tr>
 <tr class="separator:aee8d9c7084d8df28bf6f05e0851a557f"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a236d9aae385e6697874f75e4c8a69f8d"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">tvm::operator|</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:a236d9aae385e6697874f75e4c8a69f8d"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise or of two values  <a href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">More...</a><br /></td></tr>
-<tr class="separator:a236d9aae385e6697874f75e4c8a69f8d"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a6c238cafec94d03b8e70688d4cf82642"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a6c238cafec94d03b8e70688d4cf82642">tvm::bitwise_xor</a> (PrimExpr a, PrimExpr b, Span span=Span())</td></tr>
 <tr class="memdesc:a6c238cafec94d03b8e70688d4cf82642"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise xor of two values  <a href="namespacetvm.html#a6c238cafec94d03b8e70688d4cf82642">More...</a><br /></td></tr>
 <tr class="separator:a6c238cafec94d03b8e70688d4cf82642"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">tvm::operator^</a> (PrimExpr a, PrimExpr b)</td></tr>
-<tr class="memdesc:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise xor of two values  <a href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">More...</a><br /></td></tr>
-<tr class="separator:abd7d1b3232218b25e2e0cf6ef699a65f"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a3f6d8fba545c2944efc83b57e6190459"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a3f6d8fba545c2944efc83b57e6190459">tvm::bitwise_neg</a> (PrimExpr a, Span span=Span())</td></tr>
 <tr class="memdesc:a3f6d8fba545c2944efc83b57e6190459"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise negation of two values  <a href="namespacetvm.html#a3f6d8fba545c2944efc83b57e6190459">More...</a><br /></td></tr>
 <tr class="separator:a3f6d8fba545c2944efc83b57e6190459"><td class="memSeparator" colspan="2">&#160;</td></tr>
-<tr class="memitem:a354b9954ff25dd819a51d856fdd38827"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">tvm::operator~</a> (PrimExpr a)</td></tr>
-<tr class="memdesc:a354b9954ff25dd819a51d856fdd38827"><td class="mdescLeft">&#160;</td><td class="mdescRight">take bitwise negation of two values  <a href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">More...</a><br /></td></tr>
-<tr class="separator:a354b9954ff25dd819a51d856fdd38827"><td class="memSeparator" colspan="2">&#160;</td></tr>
 <tr class="memitem:a353217978feabae3575560bf1586885f"><td class="memItemLeft" align="right" valign="top">PrimExpr&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="namespacetvm.html#a353217978feabae3575560bf1586885f">tvm::if_then_else</a> (PrimExpr cond, PrimExpr true_value, PrimExpr false_value, Span span=Span())</td></tr>
 <tr class="memdesc:a353217978feabae3575560bf1586885f"><td class="mdescLeft">&#160;</td><td class="mdescRight">Conditional expression.  <a href="namespacetvm.html#a353217978feabae3575560bf1586885f">More...</a><br /></td></tr>
 <tr class="separator:a353217978feabae3575560bf1586885f"><td class="memSeparator" colspan="2">&#160;</td></tr>
@@ -827,7 +767,7 @@ Functions</h2></td></tr>
         </tr>
       </table>
 </div><div class="memdoc">
-<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">inline</span> PrimExpr Name(<span class="keyword">const</span> PrimExpr&amp; a, <span class="keywordtype">float</span> b) { <span class="keywordflow">return</span> Name(a, PrimExpr(b)); } \</div><div class="line">  inline PrimExpr Name(<span class="keywordtype">float</span> a, <span class="keyword">const</span> PrimExpr&amp; b) { <span class="keywordflow">return</span> Name(PrimExpr(a), b); } \</div><div class="li [...]
+<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">inline</span> PrimExpr Name(<span class="keyword">const</span> PrimExpr&amp; a, <span class="keywordtype">float</span> b) { <span class="keywordflow">return</span> Name(a, PrimExpr(b)); } \</div><div class="line">  inline PrimExpr Name(<span class="keywordtype">float</span> a, <span class="keyword">const</span> PrimExpr&amp; b) { <span class="keywordflow">return</span> Name(PrimExpr(a), b); } \</div><div class="li [...]
 </div><!-- fragment -->
 </div>
 </div>
@@ -847,7 +787,7 @@ Functions</h2></td></tr>
       </table>
 </div><div class="memdoc">
 <b>Value:</b><div class="fragment"><div class="line"><span class="keyword">inline</span> PrimExpr Name(<span class="keyword">const</span> PrimExpr&amp; a, <span class="keywordtype">float</span> b, <a class="code" href="namespacetvm_1_1relay.html#af40ca6124bc2e88f2323eeb79d326cc0">Span</a> span = <a class="code" href="namespacetvm_1_1relay.html#af40ca6124bc2e88f2323eeb79d326cc0">Span</a>()) {  \</div><div class="line">    return Name(a, PrimExpr(b), span);                                  [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 </div><!-- fragment -->
 </div>
 </div>
@@ -866,7 +806,7 @@ Functions</h2></td></tr>
         </tr>
       </table>
 </div><div class="memdoc">
-<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">inline</span> PrimExpr Name(<span class="keyword">const</span> PrimExpr&amp; a, <span class="keywordtype">int</span> b) { \</div><div class="line">    return Name(a, <a class="code" href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tir::make_const</a>(a.dtype(), b)); \</div><div class="line">  }                                                \</div><div class="line">  inline PrimExpr Name(<span cla [...]
+<b>Value:</b><div class="fragment"><div class="line"><span class="keyword">inline</span> PrimExpr Name(<span class="keyword">const</span> PrimExpr&amp; a, <span class="keywordtype">int</span> b) { \</div><div class="line">    return Name(a, <a class="code" href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tir::make_const</a>(a.dtype(), b)); \</div><div class="line">  }                                                \</div><div class="line">  inline PrimExpr Name(<span cla [...]
 </div><!-- fragment -->
 </div>
 </div>
@@ -886,7 +826,7 @@ Functions</h2></td></tr>
       </table>
 </div><div class="memdoc">
 <b>Value:</b><div class="fragment"><div class="line"><span class="keyword">inline</span> PrimExpr Name(<span class="keyword">const</span> PrimExpr&amp; a, <span class="keywordtype">int</span> b, <a class="code" href="namespacetvm_1_1relay.html#af40ca6124bc2e88f2323eeb79d326cc0">Span</a> span = <a class="code" href="namespacetvm_1_1relay.html#af40ca6124bc2e88f2323eeb79d326cc0">Span</a>()) { \</div><div class="line">    return Name(a, <a class="code" href="namespacetvm_1_1tir.html#a1a07120 [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 </div><!-- fragment -->
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/tir_2op_8h_source.html b/docs/reference/api/doxygen/tir_2op_8h_source.html
index 1e28a9838..de367c53a 100644
--- a/docs/reference/api/doxygen/tir_2op_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2op_8h_source.html
@@ -66,82 +66,75 @@ $(function() {
 <div class="title">op.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="tir_2op_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
-<div class="ttc" id="namespacetvm_html_a03983cf66713724c138f9697bb8e0e97"><div class="ttname"><a href="namespacetvm.html#a03983cf66713724c138f9697bb8e0e97">tvm::operator!=</a></div><div class="ttdeci">PrimExpr operator!=(PrimExpr a, PrimExpr b)</div><div class="ttdoc">not_equal </div></div>
+<a href="tir_2op_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more con [...]
 <div class="ttc" id="namespacetvm_1_1relay_html_af40ca6124bc2e88f2323eeb79d326cc0"><div class="ttname"><a href="namespacetvm_1_1relay.html#af40ca6124bc2e88f2323eeb79d326cc0">tvm::relay::Span</a></div><div class="ttdeci">tvm::Span Span</div><div class="ttdef"><b>Definition:</b> base.h:65</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_a246623a4a0c9cd8f8a209ec952a8d1c3"><div class="ttname"><a href="namespacetvm_1_1tir.html#a246623a4a0c9cd8f8a209ec952a8d1c3">tvm::tir::is_const_power_of_two_integer</a></div><div class="ttdeci">bool is_const_power_of_two_integer(const PrimExpr &amp;x, int *shift)</div><div class="ttdoc">Check whether x is a constant power of two If x is power of two, write the power to the shift...</div></div>
-<div class="ttc" id="namespacetvm_html_a1f98476c3a413f6cdfc7b7e490f3221b"><div class="ttname"><a href="namespacetvm.html#a1f98476c3a413f6cdfc7b7e490f3221b">tvm::operator&lt;</a></div><div class="ttdeci">PrimExpr operator&lt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">less </div></div>
 <div class="ttc" id="namespacetvm_html_a336b811d7f339f888ad38d2e2657710d"><div class="ttname"><a href="namespacetvm.html#a336b811d7f339f888ad38d2e2657710d">tvm::likely</a></div><div class="ttdeci">PrimExpr likely(PrimExpr cond, Span span=Span())</div><div class="ttdoc">Mark condition as likely. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a4f3c849cf79c0812a50fdbb9ad175648"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a4f3c849cf79c0812a50fdbb9ad175648">tvm::runtime::DataType::is_int</a></div><div class="ttdeci">bool is_int() const</div><div class="ttdef"><b>Definition:</b> data_type.h:99</div></div>
 <div class="ttc" id="namespacetvm_html_a6c238cafec94d03b8e70688d4cf82642"><div class="ttname"><a href="namespacetvm.html#a6c238cafec94d03b8e70688d4cf82642">tvm::bitwise_xor</a></div><div class="ttdeci">PrimExpr bitwise_xor(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take bitwise xor of two values </div></div>
-<div class="ttc" id="namespacetvm_html_a16a5aa0300233b6c5fbcc61c424eee30"><div class="ttname"><a href="namespacetvm.html#a16a5aa0300233b6c5fbcc61c424eee30">tvm::log10</a></div><div class="ttdeci">PrimExpr log10(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:898</div></div>
-<div class="ttc" id="namespacetvm_html_a242b37bc39f3fc56d29e36f916cc1483"><div class="ttname"><a href="namespacetvm.html#a242b37bc39f3fc56d29e36f916cc1483">tvm::operator &amp;&amp;</a></div><div class="ttdeci">Bool operator &amp;&amp;(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:384</div></div>
+<div class="ttc" id="namespacetvm_html_a16a5aa0300233b6c5fbcc61c424eee30"><div class="ttname"><a href="namespacetvm.html#a16a5aa0300233b6c5fbcc61c424eee30">tvm::log10</a></div><div class="ttdeci">PrimExpr log10(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:703</div></div>
 <div class="ttc" id="namespacetvm_html_aac2abc149c1a47944c37b560181b15c0"><div class="ttname"><a href="namespacetvm.html#aac2abc149c1a47944c37b560181b15c0">tvm::min</a></div><div class="ttdeci">PrimExpr min(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take minimum of two values </div></div>
 <div class="ttc" id="namespacetvm_html_a5cd85b156fb31f75f91c8a5c012f8a66"><div class="ttname"><a href="namespacetvm.html#a5cd85b156fb31f75f91c8a5c012f8a66">tvm::neg</a></div><div class="ttdeci">PrimExpr neg(PrimExpr a, Span span=Span())</div><div class="ttdoc">negation. </div></div>
 <div class="ttc" id="namespacetvm_html_ab1b704bb5a31b602869fb5c94a56f468"><div class="ttname"><a href="namespacetvm.html#ab1b704bb5a31b602869fb5c94a56f468">tvm::greater_equal</a></div><div class="ttdeci">PrimExpr greater_equal(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">greater_equal </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a9b5104dcf0933da31329bb0b2580a947"><div class="ttname"><a href="namespacetvm_1_1tir.html#a9b5104dcf0933da31329bb0b2580a947">tvm::tir::is_one</a></div><div class="ttdeci">bool is_one(const PrimExpr &amp;x)</div><div class="ttdoc">Check whether x is a constant integer 1. </div><div class="ttdef"><b>Definition:</b> op.h:1015</div></div>
-<div class="ttc" id="namespacetvm_html_acde00e06bb7d8ccd78f1dd33b966e178"><div class="ttname"><a href="namespacetvm.html#acde00e06bb7d8ccd78f1dd33b966e178">tvm::popcount</a></div><div class="ttdeci">PrimExpr popcount(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:899</div></div>
-<div class="ttc" id="namespacetvm_html_a475b388b9a19d05dca849707d74636a7"><div class="ttname"><a href="namespacetvm.html#a475b388b9a19d05dca849707d74636a7">tvm::atan</a></div><div class="ttdeci">PrimExpr atan(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:907</div></div>
-<div class="ttc" id="namespacetvm_html_a002710a4652156a57495e10a09b5d002"><div class="ttname"><a href="namespacetvm.html#a002710a4652156a57495e10a09b5d002">tvm::operator||</a></div><div class="ttdeci">Bool operator||(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:379</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a9b5104dcf0933da31329bb0b2580a947"><div class="ttname"><a href="namespacetvm_1_1tir.html#a9b5104dcf0933da31329bb0b2580a947">tvm::tir::is_one</a></div><div class="ttdeci">bool is_one(const PrimExpr &amp;x)</div><div class="ttdoc">Check whether x is a constant integer 1. </div><div class="ttdef"><b>Definition:</b> op.h:820</div></div>
+<div class="ttc" id="namespacetvm_html_acde00e06bb7d8ccd78f1dd33b966e178"><div class="ttname"><a href="namespacetvm.html#acde00e06bb7d8ccd78f1dd33b966e178">tvm::popcount</a></div><div class="ttdeci">PrimExpr popcount(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:704</div></div>
+<div class="ttc" id="namespacetvm_html_a475b388b9a19d05dca849707d74636a7"><div class="ttname"><a href="namespacetvm.html#a475b388b9a19d05dca849707d74636a7">tvm::atan</a></div><div class="ttdeci">PrimExpr atan(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:712</div></div>
 <div class="ttc" id="namespacetvm_html_ab2a3c98ef29937defd6accb9b171a940"><div class="ttname"><a href="namespacetvm.html#ab2a3c98ef29937defd6accb9b171a940">tvm::abs</a></div><div class="ttdeci">PrimExpr abs(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculate absolute value of x. </div></div>
 <div class="ttc" id="namespacetvm_html_a0d36aeede9c8e2fa2973807edf2b9f4b"><div class="ttname"><a href="namespacetvm.html#a0d36aeede9c8e2fa2973807edf2b9f4b">tvm::ceildiv</a></div><div class="ttdeci">PrimExpr ceildiv(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute ceil(a / b) </div></div>
 <div class="ttc" id="namespacetvm_html_aaff65dde3044433b2220677aedf4855f"><div class="ttname"><a href="namespacetvm.html#aaff65dde3044433b2220677aedf4855f">tvm::floor</a></div><div class="ttdeci">PrimExpr floor(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculate floor(x) </div></div>
-<div class="ttc" id="namespacetvm_html_a65b68a0c2cea6c1bbd338585fcdf9fdd"><div class="ttname"><a href="namespacetvm.html#a65b68a0c2cea6c1bbd338585fcdf9fdd">tvm::exp10</a></div><div class="ttdeci">PrimExpr exp10(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:890</div></div>
+<div class="ttc" id="namespacetvm_html_a65b68a0c2cea6c1bbd338585fcdf9fdd"><div class="ttname"><a href="namespacetvm.html#a65b68a0c2cea6c1bbd338585fcdf9fdd">tvm::exp10</a></div><div class="ttdeci">PrimExpr exp10(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:695</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a3c9ce1627be2550f656cd37b6c698c7da92916e0b7138b26e222f4b85618bd5b4"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a3c9ce1627be2550f656cd37b6c698c7da92916e0b7138b26e222f4b85618bd5b4">tvm::runtime::DataType::kCustomBegin</a></div><div class="ttdef"><b>Definition:</b> data_type.h:57</div></div>
 <div class="ttc" id="namespacetvm_html_a3b37fa55ea93d6868751a2441996b072"><div class="ttname"><a href="namespacetvm.html#a3b37fa55ea93d6868751a2441996b072">tvm::min_value</a></div><div class="ttdeci">PrimExpr min_value(const DataType &amp;dtype, Span span=Span())</div></div>
 <div class="ttc" id="namespacetvm_html_ada5ad8338d3144221d8f16380e6c4855"><div class="ttname"><a href="namespacetvm.html#ada5ad8338d3144221d8f16380e6c4855">tvm::indexmod</a></div><div class="ttdeci">PrimExpr indexmod(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute the remainder floor(a / b) where a and b are non-negative. </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="ir_2expr_8h_html"><div class="ttname"><a href="ir_2expr_8h.html">expr.h</a></div><div class="ttdoc">Base expr nodes in TVM. </div></div>
-<div class="ttc" id="namespacetvm_html_ad828bc801c73df761c58d9f8877d52ee"><div class="ttname"><a href="namespacetvm.html#ad828bc801c73df761c58d9f8877d52ee">tvm::sinh</a></div><div class="ttdeci">PrimExpr sinh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:904</div></div>
+<div class="ttc" id="namespacetvm_html_ad828bc801c73df761c58d9f8877d52ee"><div class="ttname"><a href="namespacetvm.html#ad828bc801c73df761c58d9f8877d52ee">tvm::sinh</a></div><div class="ttdeci">PrimExpr sinh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:709</div></div>
 <div class="ttc" id="namespacetvm_html_ae2794f261657780b2af4208b95d9cfcb"><div class="ttname"><a href="namespacetvm.html#ae2794f261657780b2af4208b95d9cfcb">tvm::add</a></div><div class="ttdeci">PrimExpr add(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">add operator </div></div>
-<div class="ttc" id="tir_2op_8h_html_abc43baea1e8f1c876bfa743a063a5928"><div class="ttname"><a href="tir_2op_8h.html#abc43baea1e8f1c876bfa743a063a5928">TVM_DECLARE_INTRIN_BINARY</a></div><div class="ttdeci">#define TVM_DECLARE_INTRIN_BINARY(OpName)</div><div class="ttdef"><b>Definition:</b> op.h:913</div></div>
+<div class="ttc" id="tir_2op_8h_html_abc43baea1e8f1c876bfa743a063a5928"><div class="ttname"><a href="tir_2op_8h.html#abc43baea1e8f1c876bfa743a063a5928">TVM_DECLARE_INTRIN_BINARY</a></div><div class="ttdeci">#define TVM_DECLARE_INTRIN_BINARY(OpName)</div><div class="ttdef"><b>Definition:</b> op.h:718</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
-<div class="ttc" id="tir_2op_8h_html_a34c733b88658efba336e09ccd85e576c"><div class="ttname"><a href="tir_2op_8h.html#a34c733b88658efba336e09ccd85e576c">TVM_DEFINE_ASSIGN_OP_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_ASSIGN_OP_OVERLOAD(Name, OpFunc)</div><div class="ttdef"><b>Definition:</b> op.h:1157</div></div>
-<div class="ttc" id="namespacetvm_html_af99838098788d40c80b402f29b3c2e8c"><div class="ttname"><a href="namespacetvm.html#af99838098788d40c80b402f29b3c2e8c">tvm::tan</a></div><div class="ttdeci">PrimExpr tan(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:900</div></div>
+<div class="ttc" id="tir_2op_8h_html_a34c733b88658efba336e09ccd85e576c"><div class="ttname"><a href="tir_2op_8h.html#a34c733b88658efba336e09ccd85e576c">TVM_DEFINE_ASSIGN_OP_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_ASSIGN_OP_OVERLOAD(Name, OpFunc)</div><div class="ttdef"><b>Definition:</b> op.h:962</div></div>
+<div class="ttc" id="namespacetvm_html_af99838098788d40c80b402f29b3c2e8c"><div class="ttname"><a href="namespacetvm.html#af99838098788d40c80b402f29b3c2e8c">tvm::tan</a></div><div class="ttdeci">PrimExpr tan(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:705</div></div>
 <div class="ttc" id="namespacetvm_html_a89da021f5e3e2e911acfd96f973e5bc3"><div class="ttname"><a href="namespacetvm.html#a89da021f5e3e2e911acfd96f973e5bc3">tvm::sub</a></div><div class="ttdeci">PrimExpr sub(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">subtraction operator </div></div>
-<div class="ttc" id="namespacetvm_html_ab72a6b6a2d0c2aa3f6a95f60dc831493"><div class="ttname"><a href="namespacetvm.html#ab72a6b6a2d0c2aa3f6a95f60dc831493">tvm::atanh</a></div><div class="ttdeci">PrimExpr atanh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:910</div></div>
+<div class="ttc" id="namespacetvm_html_ab72a6b6a2d0c2aa3f6a95f60dc831493"><div class="ttname"><a href="namespacetvm.html#ab72a6b6a2d0c2aa3f6a95f60dc831493">tvm::atanh</a></div><div class="ttdeci">PrimExpr atanh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:715</div></div>
 <div class="ttc" id="namespacetvm_html_ae7826d26d74304ff31ad2cbf278b772c"><div class="ttname"><a href="namespacetvm.html#ae7826d26d74304ff31ad2cbf278b772c">tvm::nearbyint</a></div><div class="ttdeci">PrimExpr nearbyint(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculates std::nearbyint(x) </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_afe559720046f6e10d188f5df80d4a0fc"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#afe559720046f6e10d188f5df80d4a0fc">tvm::runtime::DataType::is_float</a></div><div class="ttdeci">bool is_float() const</div><div class="ttdef"><b>Definition:</b> data_type.h:93</div></div>
-<div class="ttc" id="namespacetvm_html_a9eabd3011b72041605ac7475094c87b1"><div class="ttname"><a href="namespacetvm.html#a9eabd3011b72041605ac7475094c87b1">tvm::asin</a></div><div class="ttdeci">PrimExpr asin(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:905</div></div>
+<div class="ttc" id="namespacetvm_html_a9eabd3011b72041605ac7475094c87b1"><div class="ttname"><a href="namespacetvm.html#a9eabd3011b72041605ac7475094c87b1">tvm::asin</a></div><div class="ttdeci">PrimExpr asin(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:710</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1SeqStmtNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1SeqStmtNode.html">tvm::tir::SeqStmtNode</a></div><div class="ttdoc">The container of seq statement. Represent a sequence of statements. </div><div class="ttdef"><b>Definition:</b> stmt.h:688</div></div>
 <div class="ttc" id="namespacetvm_html_a1c4f14382b85bcfa57d9a3460db2354a"><div class="ttname"><a href="namespacetvm.html#a1c4f14382b85bcfa57d9a3460db2354a">tvm::equal</a></div><div class="ttdeci">PrimExpr equal(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">equal </div></div>
 <div class="ttc" id="namespacetvm_html_a5e4738caa6bcd0259af64b25e25dfd93"><div class="ttname"><a href="namespacetvm.html#a5e4738caa6bcd0259af64b25e25dfd93">tvm::ceil</a></div><div class="ttdeci">PrimExpr ceil(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculate ceil(x) </div></div>
-<div class="ttc" id="namespacetvm_html_ac5347541411e75f59758a29596565f63"><div class="ttname"><a href="namespacetvm.html#ac5347541411e75f59758a29596565f63">tvm::ldexp</a></div><div class="ttdeci">PrimExpr ldexp(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:923</div></div>
+<div class="ttc" id="namespacetvm_html_ac5347541411e75f59758a29596565f63"><div class="ttname"><a href="namespacetvm.html#ac5347541411e75f59758a29596565f63">tvm::ldexp</a></div><div class="ttdeci">PrimExpr ldexp(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:728</div></div>
 <div class="ttc" id="namespacetvm_html_a353217978feabae3575560bf1586885f"><div class="ttname"><a href="namespacetvm.html#a353217978feabae3575560bf1586885f">tvm::if_then_else</a></div><div class="ttdeci">PrimExpr if_then_else(PrimExpr cond, PrimExpr true_value, PrimExpr false_value, Span span=Span())</div><div class="ttdoc">Conditional expression. </div></div>
-<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:321</div></div>
+<div class="ttc" id="classtvm_1_1FloatImmNode_html"><div class="ttname"><a href="classtvm_1_1FloatImmNode.html">tvm::FloatImmNode</a></div><div class="ttdoc">Constant floating point literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:535</div></div>
 <div class="ttc" id="namespacetvm_html_a4509dece1af96338cc25097855fcecd7"><div class="ttname"><a href="namespacetvm.html#a4509dece1af96338cc25097855fcecd7">tvm::logical_or</a></div><div class="ttdeci">PrimExpr logical_or(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">or </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a71feb294f412836c3d7e012133a3f339"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a71feb294f412836c3d7e012133a3f339">tvm::runtime::DataType::code</a></div><div class="ttdeci">int code() const</div><div class="ttdef"><b>Definition:</b> data_type.h:81</div></div>
 <div class="ttc" id="namespacetvm_html_aee8d9c7084d8df28bf6f05e0851a557f"><div class="ttname"><a href="namespacetvm.html#aee8d9c7084d8df28bf6f05e0851a557f">tvm::bitwise_or</a></div><div class="ttdeci">PrimExpr bitwise_or(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take bitwise or of two values </div></div>
-<div class="ttc" id="namespacetvm_html_aa22a313c142a61845ded7fdf77af7046"><div class="ttname"><a href="namespacetvm.html#aa22a313c142a61845ded7fdf77af7046">tvm::max</a></div><div class="ttdeci">PrimExpr max(const PrimExpr &amp;a, double b, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:1229</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a6454dd89e85fc29a7e3b8620df90a6f6"><div class="ttname"><a href="namespacetvm_1_1tir.html#a6454dd89e85fc29a7e3b8620df90a6f6">tvm::tir::foldl</a></div><div class="ttdeci">PrimExpr foldl(FReduce freduce, PrimExpr init_value, const Array&lt; PrimExpr &gt; &amp;values, Span span=Span())</div><div class="ttdoc">Left fold. </div><div class="ttdef"><b>Definition:</b> op.h:1146</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_acbe8f225faaf34c540194921a7ee6a66"><div class="ttname"><a href="namespacetvm_1_1tir.html#acbe8f225faaf34c540194921a7ee6a66">tvm::tir::as_const_int</a></div><div class="ttdeci">const int64_t * as_const_int(const PrimExpr &amp;x)</div><div class="ttdoc">Get x as constant int expression. </div><div class="ttdef"><b>Definition:</b> op.h:985</div></div>
+<div class="ttc" id="namespacetvm_html_aa22a313c142a61845ded7fdf77af7046"><div class="ttname"><a href="namespacetvm.html#aa22a313c142a61845ded7fdf77af7046">tvm::max</a></div><div class="ttdeci">PrimExpr max(const PrimExpr &amp;a, double b, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:1034</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a6454dd89e85fc29a7e3b8620df90a6f6"><div class="ttname"><a href="namespacetvm_1_1tir.html#a6454dd89e85fc29a7e3b8620df90a6f6">tvm::tir::foldl</a></div><div class="ttdeci">PrimExpr foldl(FReduce freduce, PrimExpr init_value, const Array&lt; PrimExpr &gt; &amp;values, Span span=Span())</div><div class="ttdoc">Left fold. </div><div class="ttdef"><b>Definition:</b> op.h:951</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_acbe8f225faaf34c540194921a7ee6a66"><div class="ttname"><a href="namespacetvm_1_1tir.html#acbe8f225faaf34c540194921a7ee6a66">tvm::tir::as_const_int</a></div><div class="ttdeci">const int64_t * as_const_int(const PrimExpr &amp;x)</div><div class="ttdoc">Get x as constant int expression. </div><div class="ttdef"><b>Definition:</b> op.h:790</div></div>
 <div class="ttc" id="namespacetvm_html_a7ffc1cdb3a52b680e4b509395c9a252d"><div class="ttname"><a href="namespacetvm.html#a7ffc1cdb3a52b680e4b509395c9a252d">tvm::greater</a></div><div class="ttdeci">PrimExpr greater(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">greater </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a0db485654cd60d43962f532a1b16026c"><div class="ttname"><a href="namespacetvm_1_1tir.html#a0db485654cd60d43962f532a1b16026c">tvm::tir::MakeConstScalar</a></div><div class="ttdeci">PrimExpr MakeConstScalar(DataType t, ValueType value, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:1103</div></div>
-<div class="ttc" id="namespacetvm_html_a350f9808d53b5fd9ad5c4c50bb76d700"><div class="ttname"><a href="namespacetvm.html#a350f9808d53b5fd9ad5c4c50bb76d700">tvm::atan2</a></div><div class="ttdeci">PrimExpr atan2(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:919</div></div>
-<div class="ttc" id="namespacetvm_html_abde487c0197942c4ebb1b47277b89dac"><div class="ttname"><a href="namespacetvm.html#abde487c0197942c4ebb1b47277b89dac">tvm::operator-</a></div><div class="ttdeci">PrimExpr operator-(PrimExpr a, PrimExpr b)</div><div class="ttdoc">subtraction operator </div></div>
-<div class="ttc" id="tir_2op_8h_html_ab6a17993efa67183ba992dac29284c80"><div class="ttname"><a href="tir_2op_8h.html#ab6a17993efa67183ba992dac29284c80">TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1193</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a0db485654cd60d43962f532a1b16026c"><div class="ttname"><a href="namespacetvm_1_1tir.html#a0db485654cd60d43962f532a1b16026c">tvm::tir::MakeConstScalar</a></div><div class="ttdeci">PrimExpr MakeConstScalar(DataType t, ValueType value, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:908</div></div>
+<div class="ttc" id="namespacetvm_html_a350f9808d53b5fd9ad5c4c50bb76d700"><div class="ttname"><a href="namespacetvm.html#a350f9808d53b5fd9ad5c4c50bb76d700">tvm::atan2</a></div><div class="ttdeci">PrimExpr atan2(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:724</div></div>
+<div class="ttc" id="tir_2op_8h_html_ab6a17993efa67183ba992dac29284c80"><div class="ttname"><a href="tir_2op_8h.html#ab6a17993efa67183ba992dac29284c80">TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD(Name)</div><div class="ttdef"><b>Definition:</b> op.h:998</div></div>
 <div class="ttc" id="namespacetvm_html_a0447e9aa45f6cab707f6dc9f9281b3f5"><div class="ttname"><a href="namespacetvm.html#a0447e9aa45f6cab707f6dc9f9281b3f5">tvm::GetRuntimeDataType</a></div><div class="ttdeci">runtime::DataType GetRuntimeDataType(const Type &amp;type)</div><div class="ttdoc">Get the implied DataType for storing values with type during runtime. </div></div>
 <div class="ttc" id="classtvm_1_1PointerTypeNode_html"><div class="ttname"><a href="classtvm_1_1PointerTypeNode.html">tvm::PointerTypeNode</a></div><div class="ttdoc">Low-level raw pointer type. </div><div class="ttdef"><b>Definition:</b> type.h:150</div></div>
-<div class="ttc" id="namespacetvm_html_a52a4f309e25bcb51c6038f6e3ee931ec"><div class="ttname"><a href="namespacetvm.html#a52a4f309e25bcb51c6038f6e3ee931ec">tvm::asinh</a></div><div class="ttdeci">PrimExpr asinh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:909</div></div>
-<div class="ttc" id="tir_2op_8h_html_a29826503ae15ba83c6bc8e6cbe218a69"><div class="ttname"><a href="tir_2op_8h.html#a29826503ae15ba83c6bc8e6cbe218a69">TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD_SPANNED</a></div><div class="ttdeci">#define TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD_SPANNED(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1197</div></div>
+<div class="ttc" id="namespacetvm_html_a52a4f309e25bcb51c6038f6e3ee931ec"><div class="ttname"><a href="namespacetvm.html#a52a4f309e25bcb51c6038f6e3ee931ec">tvm::asinh</a></div><div class="ttdeci">PrimExpr asinh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:714</div></div>
+<div class="ttc" id="tir_2op_8h_html_a29826503ae15ba83c6bc8e6cbe218a69"><div class="ttname"><a href="tir_2op_8h.html#a29826503ae15ba83c6bc8e6cbe218a69">TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD_SPANNED</a></div><div class="ttdeci">#define TVM_DEFINE_LOGICAL_OP_CONST_VAL_OVERLOAD_SPANNED(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1002</div></div>
 <div class="ttc" id="namespacetvm_html_a4bfb789a86d95f6241b50fd26f269c28"><div class="ttname"><a href="namespacetvm.html#a4bfb789a86d95f6241b50fd26f269c28">tvm::cast</a></div><div class="ttdeci">PrimExpr cast(const DataType &amp;t, PrimExpr value, Span span=Span())</div><div class="ttdoc">cast value to type. </div></div>
-<div class="ttc" id="namespacetvm_html_ae39f72b12020a4f7ad6b16b66ffdfe1f"><div class="ttname"><a href="namespacetvm.html#ae39f72b12020a4f7ad6b16b66ffdfe1f">tvm::log</a></div><div class="ttdeci">PrimExpr log(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:896</div></div>
+<div class="ttc" id="namespacetvm_html_ae39f72b12020a4f7ad6b16b66ffdfe1f"><div class="ttname"><a href="namespacetvm.html#ae39f72b12020a4f7ad6b16b66ffdfe1f">tvm::log</a></div><div class="ttdeci">PrimExpr log(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:701</div></div>
 <div class="ttc" id="namespacetvm_html_a54d9c399c82d7f384ee93f235496ab64"><div class="ttname"><a href="namespacetvm.html#a54d9c399c82d7f384ee93f235496ab64">tvm::round</a></div><div class="ttdeci">PrimExpr round(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculate round(x) </div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="ir_2op_8h_html"><div class="ttname"><a href="ir_2op_8h.html">op.h</a></div><div class="ttdoc">Primitive operators(builtin intrinsics) and registry for them. </div></div>
-<div class="ttc" id="classtvm_1_1FloatImm_html"><div class="ttname"><a href="classtvm_1_1FloatImm.html">tvm::FloatImm</a></div><div class="ttdoc">Managed reference class to FloatImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:350</div></div>
-<div class="ttc" id="namespacetvm_html_a50c4b8aeaf39b357013fc7f62b4a878c"><div class="ttname"><a href="namespacetvm.html#a50c4b8aeaf39b357013fc7f62b4a878c">tvm::exp2</a></div><div class="ttdeci">PrimExpr exp2(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:889</div></div>
-<div class="ttc" id="namespacetvm_html_a2a1269a38e7e3621eb2906a47157106a"><div class="ttname"><a href="namespacetvm.html#a2a1269a38e7e3621eb2906a47157106a">tvm::operator &amp;</a></div><div class="ttdeci">PrimExpr operator &amp;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take bitwise and of two values </div></div>
+<div class="ttc" id="classtvm_1_1FloatImm_html"><div class="ttname"><a href="classtvm_1_1FloatImm.html">tvm::FloatImm</a></div><div class="ttdoc">Managed reference class to FloatImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:564</div></div>
+<div class="ttc" id="namespacetvm_html_a50c4b8aeaf39b357013fc7f62b4a878c"><div class="ttname"><a href="namespacetvm.html#a50c4b8aeaf39b357013fc7f62b4a878c">tvm::exp2</a></div><div class="ttdeci">PrimExpr exp2(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:694</div></div>
 <div class="ttc" id="namespacetvm_html_a6dfe80d16a7b4f551c87a8901d366d08"><div class="ttname"><a href="namespacetvm.html#a6dfe80d16a7b4f551c87a8901d366d08">tvm::less_equal</a></div><div class="ttdeci">PrimExpr less_equal(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">less_equal </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a8dd84303a9864b5b366835fa628a7824"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8dd84303a9864b5b366835fa628a7824">tvm::tir::const_true</a></div><div class="ttdeci">PrimExpr const_true(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant true expression. </div><div class="ttdef"><b>Definition:</b> op.h:967</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a8dd84303a9864b5b366835fa628a7824"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8dd84303a9864b5b366835fa628a7824">tvm::tir::const_true</a></div><div class="ttdeci">PrimExpr const_true(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant true expression. </div><div class="ttdef"><b>Definition:</b> op.h:772</div></div>
 <div class="ttc" id="classtvm_1_1Span_html"><div class="ttname"><a href="classtvm_1_1Span.html">tvm::Span</a></div><div class="ttdef"><b>Definition:</b> span.h:115</div></div>
 <div class="ttc" id="stmt_8h_html"><div class="ttname"><a href="stmt_8h.html">stmt.h</a></div><div class="ttdoc">TIR statements. </div></div>
 <div class="ttc" id="namespacetvm_html_a8683adb542beba8ecc69354e50d62ef6"><div class="ttname"><a href="namespacetvm.html#a8683adb542beba8ecc69354e50d62ef6">tvm::floormod</a></div><div class="ttdeci">PrimExpr floormod(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute the remainder of floordiv </div></div>
-<div class="ttc" id="namespacetvm_html_a51dc569142bf8ce8ea55f73029d3807d"><div class="ttname"><a href="namespacetvm.html#a51dc569142bf8ce8ea55f73029d3807d">tvm::operator/=</a></div><div class="ttdeci">PrimExpr operator/=(const PrimExpr &amp;a, const TB &amp;b)</div><div class="ttdef"><b>Definition:</b> op.h:1290</div></div>
+<div class="ttc" id="namespacetvm_html_a51dc569142bf8ce8ea55f73029d3807d"><div class="ttname"><a href="namespacetvm.html#a51dc569142bf8ce8ea55f73029d3807d">tvm::operator/=</a></div><div class="ttdeci">PrimExpr operator/=(const PrimExpr &amp;a, const TB &amp;b)</div><div class="ttdef"><b>Definition:</b> op.h:1095</div></div>
 <div class="ttc" id="namespacetvm_html_a16f9cd9219b505e2cc05c5a7558ac61f"><div class="ttname"><a href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">tvm::div</a></div><div class="ttdeci">PrimExpr div(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute division in C semantics. </div></div>
-<div class="ttc" id="namespacetvm_html_a28e456d33229a628a312110db8d45b44"><div class="ttname"><a href="namespacetvm.html#a28e456d33229a628a312110db8d45b44">tvm::hypot</a></div><div class="ttdeci">PrimExpr hypot(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:922</div></div>
-<div class="ttc" id="namespacetvm_html_ab354bf1270121abea71fade83f13b0b0"><div class="ttname"><a href="namespacetvm.html#ab354bf1270121abea71fade83f13b0b0">tvm::operator!</a></div><div class="ttdeci">PrimExpr operator!(PrimExpr a)</div><div class="ttdoc">not </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a875c28f423ab96ae8f0d21f1263abe14"><div class="ttname"><a href="namespacetvm_1_1tir.html#a875c28f423ab96ae8f0d21f1263abe14">tvm::tir::const_false</a></div><div class="ttdeci">PrimExpr const_false(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant false expression. </div><div class="ttdef"><b>Definition:</b> op.h:976</div></div>
+<div class="ttc" id="namespacetvm_html_a28e456d33229a628a312110db8d45b44"><div class="ttname"><a href="namespacetvm.html#a28e456d33229a628a312110db8d45b44">tvm::hypot</a></div><div class="ttdeci">PrimExpr hypot(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:727</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a875c28f423ab96ae8f0d21f1263abe14"><div class="ttname"><a href="namespacetvm_1_1tir.html#a875c28f423ab96ae8f0d21f1263abe14">tvm::tir::const_false</a></div><div class="ttdeci">PrimExpr const_false(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant false expression. </div><div class="ttdef"><b>Definition:</b> op.h:781</div></div>
 <div class="ttc" id="ir_2type_8h_html"><div class="ttname"><a href="ir_2type_8h.html">type.h</a></div><div class="ttdoc">IR/AST nodes for the unified type system in TVM. </div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Broadcast_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Broadcast.html">tvm::tir::Broadcast</a></div><div class="ttdoc">Managed reference to BroadcastNode. </div><div class="ttdef"><b>Definition:</b> expr.h:855</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a17d8d5ad92691f9e18e3e0ae8ef69e4f"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a17d8d5ad92691f9e18e3e0ae8ef69e4f">tvm::runtime::ObjectRef::defined</a></div><div class="ttdeci">bool defined() const</div><div class="ttdef"><b>Definition:</b> object.h:544</div></div>
@@ -151,86 +144,76 @@ $(function() {
 <div class="ttc" id="namespacetvm_html_a34084606675cd2c73c6b0f10e1618280"><div class="ttname"><a href="namespacetvm.html#a34084606675cd2c73c6b0f10e1618280">tvm::reinterpret</a></div><div class="ttdeci">PrimExpr reinterpret(const DataType &amp;t, PrimExpr value, Span span=Span())</div><div class="ttdoc">perform reinterpret cast value to type. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
 <div class="ttc" id="namespacetvm_html_a8f30aa0685ca52f846843e76a1ad1dc7"><div class="ttname"><a href="namespacetvm.html#a8f30aa0685ca52f846843e76a1ad1dc7">tvm::indexdiv</a></div><div class="ttdeci">PrimExpr indexdiv(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute floor(a / b) where a and b are non-negative. </div></div>
-<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
+<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:518</div></div>
 <div class="ttc" id="namespacetvm_html_ac788f9eb54a8971596779537afc6c896"><div class="ttname"><a href="namespacetvm.html#ac788f9eb54a8971596779537afc6c896">tvm::q_multiply_shift</a></div><div class="ttdeci">PrimExpr q_multiply_shift(PrimExpr x, PrimExpr y, PrimExpr q, PrimExpr s, Span span=Span())</div><div class="ttdoc">Execute a multiplication between two Q-numbers x and y followed by a right shift s. The mathematical expression is: </div></div>
-<div class="ttc" id="namespacetvm_html_af682776c3609284f1bc3ea436e21a67a"><div class="ttname"><a href="namespacetvm.html#af682776c3609284f1bc3ea436e21a67a">tvm::operator&lt;&lt;</a></div><div class="ttdeci">PrimExpr operator&lt;&lt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">left shift operator </div></div>
-<div class="ttc" id="tir_2op_8h_html_a032e3ae6824990aad98b8992f90a83c9"><div class="ttname"><a href="tir_2op_8h.html#a032e3ae6824990aad98b8992f90a83c9">TVM_DECLARE_INTRIN_UNARY</a></div><div class="ttdeci">#define TVM_DECLARE_INTRIN_UNARY(OpName)</div><div class="ttdef"><b>Definition:</b> op.h:874</div></div>
-<div class="ttc" id="namespacetvm_html_abd7d1b3232218b25e2e0cf6ef699a65f"><div class="ttname"><a href="namespacetvm.html#abd7d1b3232218b25e2e0cf6ef699a65f">tvm::operator^</a></div><div class="ttdeci">PrimExpr operator^(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take bitwise xor of two values </div></div>
+<div class="ttc" id="tir_2op_8h_html_a032e3ae6824990aad98b8992f90a83c9"><div class="ttname"><a href="tir_2op_8h.html#a032e3ae6824990aad98b8992f90a83c9">TVM_DECLARE_INTRIN_UNARY</a></div><div class="ttdeci">#define TVM_DECLARE_INTRIN_UNARY(OpName)</div><div class="ttdef"><b>Definition:</b> op.h:679</div></div>
 <div class="ttc" id="namespacetvm_html_a096aa20c0df975d089231b2c6fda2e61"><div class="ttname"><a href="namespacetvm.html#a096aa20c0df975d089231b2c6fda2e61">tvm::isfinite</a></div><div class="ttdeci">PrimExpr isfinite(PrimExpr x, Span span=Span())</div><div class="ttdoc">Check if x is finite. </div></div>
-<div class="ttc" id="tir_2op_8h_html_a0ad19625381aae20ca7a930260089c47"><div class="ttname"><a href="tir_2op_8h.html#a0ad19625381aae20ca7a930260089c47">TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1205</div></div>
+<div class="ttc" id="tir_2op_8h_html_a0ad19625381aae20ca7a930260089c47"><div class="ttname"><a href="tir_2op_8h.html#a0ad19625381aae20ca7a930260089c47">TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1010</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1BroadcastNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1BroadcastNode.html">tvm::tir::BroadcastNode</a></div><div class="ttdoc">Create a vector where all the elements are value. </div><div class="ttdef"><b>Definition:</b> expr.h:823</div></div>
-<div class="ttc" id="namespacetvm_html_aeeef6fde2a1352eae8abddd994c657b7"><div class="ttname"><a href="namespacetvm.html#aeeef6fde2a1352eae8abddd994c657b7">tvm::clz</a></div><div class="ttdeci">PrimExpr clz(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:911</div></div>
+<div class="ttc" id="namespacetvm_html_aeeef6fde2a1352eae8abddd994c657b7"><div class="ttname"><a href="namespacetvm.html#aeeef6fde2a1352eae8abddd994c657b7">tvm::clz</a></div><div class="ttdeci">PrimExpr clz(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:716</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Stmt_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Stmt.html">tvm::tir::Stmt</a></div><div class="ttdoc">Container of all statements. </div><div class="ttdef"><b>Definition:</b> stmt.h:57</div></div>
-<div class="ttc" id="namespacetvm_html_a41c8855d1e4f7ea1d01e42e6c214f877"><div class="ttname"><a href="namespacetvm.html#a41c8855d1e4f7ea1d01e42e6c214f877">tvm::cosh</a></div><div class="ttdeci">PrimExpr cosh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:902</div></div>
+<div class="ttc" id="namespacetvm_html_a41c8855d1e4f7ea1d01e42e6c214f877"><div class="ttname"><a href="namespacetvm.html#a41c8855d1e4f7ea1d01e42e6c214f877">tvm::cosh</a></div><div class="ttdeci">PrimExpr cosh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:707</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a0de5803abe309ca66c23adb15c565afb"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a0de5803abe309ca66c23adb15c565afb">tvm::runtime::DataType::is_uint</a></div><div class="ttdeci">bool is_uint() const</div><div class="ttdef"><b>Definition:</b> data_type.h:101</div></div>
 <div class="ttc" id="namespacetvm_html_a0df5ca82d2c566f628ebb2f1e84a3fcb"><div class="ttname"><a href="namespacetvm.html#a0df5ca82d2c566f628ebb2f1e84a3fcb">tvm::max</a></div><div class="ttdeci">PrimExpr max(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take maximum of two values </div></div>
-<div class="ttc" id="tir_2op_8h_html_aabcf618a12e97c38fccecf7351392154"><div class="ttname"><a href="tir_2op_8h.html#aabcf618a12e97c38fccecf7351392154">TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1163</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html_a81f4c116ffb5931fdd64639eacad415d"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a81f4c116ffb5931fdd64639eacad415d">tvm::IntImmNode::value</a></div><div class="ttdeci">int64_t value</div><div class="ttdoc">the Internal value. </div><div class="ttdef"><b>Definition:</b> expr.h:278</div></div>
+<div class="ttc" id="tir_2op_8h_html_aabcf618a12e97c38fccecf7351392154"><div class="ttname"><a href="tir_2op_8h.html#aabcf618a12e97c38fccecf7351392154">TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD</a></div><div class="ttdeci">#define TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD(Name)</div><div class="ttdef"><b>Definition:</b> op.h:968</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html_a81f4c116ffb5931fdd64639eacad415d"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a81f4c116ffb5931fdd64639eacad415d">tvm::IntImmNode::value</a></div><div class="ttdeci">int64_t value</div><div class="ttdoc">the Internal value. </div><div class="ttdef"><b>Definition:</b> expr.h:492</div></div>
 <div class="ttc" id="namespacetvm_html_a15f25703cfce73c75cb4cd33c74ea8f0"><div class="ttname"><a href="namespacetvm.html#a15f25703cfce73c75cb4cd33c74ea8f0">tvm::shapediv</a></div><div class="ttdeci">PrimExpr shapediv(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute ceil(a / b) where a and b are non-negative. </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_aed3f57cf8d1c3546f075701898c5b70f"><div class="ttname"><a href="namespacetvm_1_1tir.html#aed3f57cf8d1c3546f075701898c5b70f">tvm::tir::make_zero</a></div><div class="ttdeci">PrimExpr make_zero(DataType t, Span span=Span())</div><div class="ttdoc">Make a const zero expr. </div><div class="ttdef"><b>Definition:</b> op.h:1138</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_aed3f57cf8d1c3546f075701898c5b70f"><div class="ttname"><a href="namespacetvm_1_1tir.html#aed3f57cf8d1c3546f075701898c5b70f">tvm::tir::make_zero</a></div><div class="ttdeci">PrimExpr make_zero(DataType t, Span span=Span())</div><div class="ttdoc">Make a const zero expr. </div><div class="ttdef"><b>Definition:</b> op.h:943</div></div>
 <div class="ttc" id="namespacetvm_html_a3f6d8fba545c2944efc83b57e6190459"><div class="ttname"><a href="namespacetvm.html#a3f6d8fba545c2944efc83b57e6190459">tvm::bitwise_neg</a></div><div class="ttdeci">PrimExpr bitwise_neg(PrimExpr a, Span span=Span())</div><div class="ttdoc">take bitwise negation of two values </div></div>
 <div class="ttc" id="namespacetvm_html_a5efd9942cdee5a56cfc438ba523c04f0"><div class="ttname"><a href="namespacetvm.html#a5efd9942cdee5a56cfc438ba523c04f0">tvm::any</a></div><div class="ttdeci">PrimExpr any(PrimExpr source, Array&lt; tir::IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">logical Or of of source expression over axis </div></div>
-<div class="ttc" id="namespacetvm_html_a139870d327497d548e2ef8bddba2f114"><div class="ttname"><a href="namespacetvm.html#a139870d327497d548e2ef8bddba2f114">tvm::erf</a></div><div class="ttdeci">PrimExpr erf(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:891</div></div>
+<div class="ttc" id="namespacetvm_html_a139870d327497d548e2ef8bddba2f114"><div class="ttname"><a href="namespacetvm.html#a139870d327497d548e2ef8bddba2f114">tvm::erf</a></div><div class="ttdeci">PrimExpr erf(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:696</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1EvaluateNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1EvaluateNode.html">tvm::tir::EvaluateNode</a></div><div class="ttdoc">Evaluates an expression. This is mostly used for putting a Call node into Stmt. </div><div class="ttdef"><b>Definition:</b> stmt.h:834</div></div>
-<div class="ttc" id="namespacetvm_html_a1ce1eb32fc9d76ebe5a6b8d185024d41"><div class="ttname"><a href="namespacetvm.html#a1ce1eb32fc9d76ebe5a6b8d185024d41">tvm::operator&gt;&gt;</a></div><div class="ttdeci">PrimExpr operator&gt;&gt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">right shift operator </div></div>
-<div class="ttc" id="namespacetvm_html_a96d86ba91e4855c84879ba886465cacf"><div class="ttname"><a href="namespacetvm.html#a96d86ba91e4855c84879ba886465cacf">tvm::nextafter</a></div><div class="ttdeci">PrimExpr nextafter(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:920</div></div>
+<div class="ttc" id="namespacetvm_html_a96d86ba91e4855c84879ba886465cacf"><div class="ttname"><a href="namespacetvm.html#a96d86ba91e4855c84879ba886465cacf">tvm::nextafter</a></div><div class="ttdeci">PrimExpr nextafter(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:725</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a7a67295643b82bbe37cf36e6f69e8323"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a7a67295643b82bbe37cf36e6f69e8323">tvm::runtime::DataType::lanes</a></div><div class="ttdeci">int lanes() const</div><div class="ttdef"><b>Definition:</b> data_type.h:87</div></div>
 <div class="ttc" id="namespacetvm_html_a27d5567b95675d383c4675fdcd85346c"><div class="ttname"><a href="namespacetvm.html#a27d5567b95675d383c4675fdcd85346c">tvm::logical_and</a></div><div class="ttdeci">PrimExpr logical_and(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">and </div></div>
 <div class="ttc" id="namespacetvm_html_a2428ea0e23bd9f7218aebd066bb2cd88"><div class="ttname"><a href="namespacetvm.html#a2428ea0e23bd9f7218aebd066bb2cd88">tvm::truncmod</a></div><div class="ttdeci">PrimExpr truncmod(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute the remainder of truncdiv </div></div>
 <div class="ttc" id="namespacetvm_1_1relay_html_a661d95f170bca230773914caeef3fe52"><div class="ttname"><a href="namespacetvm_1_1relay.html#a661d95f170bca230773914caeef3fe52">tvm::relay::Type</a></div><div class="ttdeci">tvm::Type Type</div><div class="ttdef"><b>Definition:</b> type.h:47</div></div>
-<div class="ttc" id="namespacetvm_html_a8b12d0bb7c343e149ae0631a2577547b"><div class="ttname"><a href="namespacetvm.html#a8b12d0bb7c343e149ae0631a2577547b">tvm::operator==</a></div><div class="ttdeci">bool operator==(const Bool &amp;a, bool b)</div><div class="ttdef"><b>Definition:</b> expr.h:390</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_acfe8fe9c3873fdec74c9a7b03161766f"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#acfe8fe9c3873fdec74c9a7b03161766f">tvm::runtime::DataType::is_bfloat16</a></div><div class="ttdeci">bool is_bfloat16() const</div><div class="ttdef"><b>Definition:</b> data_type.h:97</div></div>
-<div class="ttc" id="namespacetvm_html_a5530417da455bd46f5dc55f27d69bcdf"><div class="ttname"><a href="namespacetvm.html#a5530417da455bd46f5dc55f27d69bcdf">tvm::operator&gt;=</a></div><div class="ttdeci">PrimExpr operator&gt;=(PrimExpr a, PrimExpr b)</div><div class="ttdoc">greater_equal </div></div>
 <div class="ttc" id="namespacetvm_html_af347f10e3572adb2d74ba4a53777db2b"><div class="ttname"><a href="namespacetvm.html#af347f10e3572adb2d74ba4a53777db2b">tvm::floordiv</a></div><div class="ttdeci">PrimExpr floordiv(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute floor(a / b) </div></div>
-<div class="ttc" id="namespacetvm_html_aa8e1cc91eb14b427e3018836d82e15e6"><div class="ttname"><a href="namespacetvm.html#aa8e1cc91eb14b427e3018836d82e15e6">tvm::acos</a></div><div class="ttdeci">PrimExpr acos(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:906</div></div>
+<div class="ttc" id="namespacetvm_html_aa8e1cc91eb14b427e3018836d82e15e6"><div class="ttname"><a href="namespacetvm.html#aa8e1cc91eb14b427e3018836d82e15e6">tvm::acos</a></div><div class="ttdeci">PrimExpr acos(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:711</div></div>
 <div class="ttc" id="namespacetvm_html_a48fb9755f38ffcfcd03592a47ffbbd14"><div class="ttname"><a href="namespacetvm.html#a48fb9755f38ffcfcd03592a47ffbbd14">tvm::GetType</a></div><div class="ttdeci">Type GetType(const PrimExpr &amp;expr)</div><div class="ttdoc">Get the type of the expression under the unified type system. </div></div>
-<div class="ttc" id="namespacetvm_html_a5c5034de2993b9130b7bd9d593a11bb5"><div class="ttname"><a href="namespacetvm.html#a5c5034de2993b9130b7bd9d593a11bb5">tvm::operator*</a></div><div class="ttdeci">PrimExpr operator*(PrimExpr a, PrimExpr b)</div><div class="ttdoc">multiplication operator </div></div>
-<div class="ttc" id="namespacetvm_html_ac1b3a94a13d11c02d7e79cad2638e74a"><div class="ttname"><a href="namespacetvm.html#ac1b3a94a13d11c02d7e79cad2638e74a">tvm::log2</a></div><div class="ttdeci">PrimExpr log2(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:897</div></div>
+<div class="ttc" id="namespacetvm_html_ac1b3a94a13d11c02d7e79cad2638e74a"><div class="ttname"><a href="namespacetvm.html#ac1b3a94a13d11c02d7e79cad2638e74a">tvm::log2</a></div><div class="ttdeci">PrimExpr log2(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:702</div></div>
 <div class="ttc" id="namespacetvm_html_adeeaff4fb29f75a9da8ff4d67723c693"><div class="ttname"><a href="namespacetvm.html#adeeaff4fb29f75a9da8ff4d67723c693">tvm::all</a></div><div class="ttdeci">PrimExpr all(PrimExpr source, Array&lt; tir::IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">logical And of of source expression over axis </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a48cd6ae7623f42cddbb05cc008c33711"><div class="ttname"><a href="namespacetvm_1_1tir.html#a48cd6ae7623f42cddbb05cc008c33711">tvm::tir::IsPointerType</a></div><div class="ttdeci">bool IsPointerType(const Type &amp;type, const DataType &amp;element_type)</div><div class="ttdoc">Check if type is a pointer to a runtime element type. </div><div class="ttdef"><b>Definition:</b> op.h:933</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a5c414d5e54c099ad7287be302aac8f02"><div class="ttname"><a href="namespacetvm_1_1tir.html#a5c414d5e54c099ad7287be302aac8f02">tvm::tir::is_const_int</a></div><div class="ttdeci">bool is_const_int(const PrimExpr &amp;x, int64_t value)</div><div class="ttdoc">Check whether x is a constant integer expression. </div><div class="ttdef"><b>Definition:</b> op.h:1086</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a48cd6ae7623f42cddbb05cc008c33711"><div class="ttname"><a href="namespacetvm_1_1tir.html#a48cd6ae7623f42cddbb05cc008c33711">tvm::tir::IsPointerType</a></div><div class="ttdeci">bool IsPointerType(const Type &amp;type, const DataType &amp;element_type)</div><div class="ttdoc">Check if type is a pointer to a runtime element type. </div><div class="ttdef"><b>Definition:</b> op.h:738</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a5c414d5e54c099ad7287be302aac8f02"><div class="ttname"><a href="namespacetvm_1_1tir.html#a5c414d5e54c099ad7287be302aac8f02">tvm::tir::is_const_int</a></div><div class="ttdeci">bool is_const_int(const PrimExpr &amp;x, int64_t value)</div><div class="ttdoc">Check whether x is a constant integer expression. </div><div class="ttdef"><b>Definition:</b> op.h:891</div></div>
 <div class="ttc" id="namespacetvm_html_ac3932d85fd31819eae6a80841296af51"><div class="ttname"><a href="namespacetvm.html#ac3932d85fd31819eae6a80841296af51">tvm::not_equal</a></div><div class="ttdeci">PrimExpr not_equal(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">not_equal </div></div>
-<div class="ttc" id="namespacetvm_html_afdd8659490e81bdc0f2d42b77b882d30"><div class="ttname"><a href="namespacetvm.html#afdd8659490e81bdc0f2d42b77b882d30">tvm::cos</a></div><div class="ttdeci">PrimExpr cos(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:901</div></div>
-<div class="ttc" id="namespacetvm_html_af38d8633e3508033faa7bd60d8232bfe"><div class="ttname"><a href="namespacetvm.html#af38d8633e3508033faa7bd60d8232bfe">tvm::acosh</a></div><div class="ttdeci">PrimExpr acosh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:908</div></div>
-<div class="ttc" id="namespacetvm_html_abf978d3e6abd0e3754b853dc4fa9869e"><div class="ttname"><a href="namespacetvm.html#abf978d3e6abd0e3754b853dc4fa9869e">tvm::sqrt</a></div><div class="ttdeci">PrimExpr sqrt(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:894</div></div>
-<div class="ttc" id="namespacetvm_html_a12c5457301d8a2c03a2ba1163edd7cee"><div class="ttname"><a href="namespacetvm.html#a12c5457301d8a2c03a2ba1163edd7cee">tvm::tanh</a></div><div class="ttdeci">PrimExpr tanh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:892</div></div>
+<div class="ttc" id="namespacetvm_html_afdd8659490e81bdc0f2d42b77b882d30"><div class="ttname"><a href="namespacetvm.html#afdd8659490e81bdc0f2d42b77b882d30">tvm::cos</a></div><div class="ttdeci">PrimExpr cos(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:706</div></div>
+<div class="ttc" id="namespacetvm_html_af38d8633e3508033faa7bd60d8232bfe"><div class="ttname"><a href="namespacetvm.html#af38d8633e3508033faa7bd60d8232bfe">tvm::acosh</a></div><div class="ttdeci">PrimExpr acosh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:713</div></div>
+<div class="ttc" id="namespacetvm_html_abf978d3e6abd0e3754b853dc4fa9869e"><div class="ttname"><a href="namespacetvm.html#abf978d3e6abd0e3754b853dc4fa9869e">tvm::sqrt</a></div><div class="ttdeci">PrimExpr sqrt(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:699</div></div>
+<div class="ttc" id="namespacetvm_html_a12c5457301d8a2c03a2ba1163edd7cee"><div class="ttname"><a href="namespacetvm.html#a12c5457301d8a2c03a2ba1163edd7cee">tvm::tanh</a></div><div class="ttdeci">PrimExpr tanh(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:697</div></div>
 <div class="ttc" id="namespacetvm_html_a8934beb918da0e451d3aab7ccbcd9859"><div class="ttname"><a href="namespacetvm.html#a8934beb918da0e451d3aab7ccbcd9859">tvm::infinity</a></div><div class="ttdeci">PrimExpr infinity(const DataType &amp;dtype, Span span=Span())</div></div>
 <div class="ttc" id="namespacetvm_html_a98a791851ba1a7631e50587ae370b3b8"><div class="ttname"><a href="namespacetvm.html#a98a791851ba1a7631e50587ae370b3b8">tvm::LargeUIntImm</a></div><div class="ttdeci">PrimExpr LargeUIntImm(DataType dtype, int64_t low, int64_t high, Span span=Span())</div><div class="ttdoc">Construct a large uint constant by its low 32 bits and high 32bits. </div></div>
 <div class="ttc" id="namespacetvm_html_a5a8143fd484af0da57222d6ff0da6323"><div class="ttname"><a href="namespacetvm.html#a5a8143fd484af0da57222d6ff0da6323">tvm::GetTypeFromRuntimeDataType</a></div><div class="ttdeci">Type GetTypeFromRuntimeDataType(const DataType &amp;dtype)</div><div class="ttdoc">Get the type corresponding to DataType. </div></div>
 <div class="ttc" id="namespacetvm_html_a4f1398024c0af23699447ef910b654b8"><div class="ttname"><a href="namespacetvm.html#a4f1398024c0af23699447ef910b654b8">tvm::max_value</a></div><div class="ttdeci">PrimExpr max_value(const DataType &amp;dtype, Span span=Span())</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a5b96c80ce43c8276e39c15787d997651"><div class="ttname"><a href="namespacetvm_1_1tir.html#a5b96c80ce43c8276e39c15787d997651">tvm::tir::is_const_number</a></div><div class="ttdeci">bool is_const_number(const PrimExpr &amp;x)</div><div class="ttdoc">Check whether x is an integer/float constant. </div><div class="ttdef"><b>Definition:</b> op.h:1065</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a5b96c80ce43c8276e39c15787d997651"><div class="ttname"><a href="namespacetvm_1_1tir.html#a5b96c80ce43c8276e39c15787d997651">tvm::tir::is_const_number</a></div><div class="ttdeci">bool is_const_number(const PrimExpr &amp;x)</div><div class="ttdoc">Check whether x is an integer/float constant. </div><div class="ttdef"><b>Definition:</b> op.h:870</div></div>
 <div class="ttc" id="namespacetvm_html_a5472f967969aebee254e8e78f2396436"><div class="ttname"><a href="namespacetvm.html#a5472f967969aebee254e8e78f2396436">tvm::trunc</a></div><div class="ttdeci">PrimExpr trunc(PrimExpr x, Span span=Span())</div><div class="ttdoc">Calculate trunc(x) </div></div>
-<div class="ttc" id="tir_2op_8h_html_ac211367ff4e2382caf322a3903f8c629"><div class="ttname"><a href="tir_2op_8h.html#ac211367ff4e2382caf322a3903f8c629">TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD_SPANNED</a></div><div class="ttdeci">#define TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD_SPANNED(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1211</div></div>
+<div class="ttc" id="tir_2op_8h_html_ac211367ff4e2382caf322a3903f8c629"><div class="ttname"><a href="tir_2op_8h.html#ac211367ff4e2382caf322a3903f8c629">TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD_SPANNED</a></div><div class="ttdeci">#define TVM_DEFINE_INT_OP_CONST_VAL_OVERLOAD_SPANNED(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1016</div></div>
 <div class="ttc" id="namespacetvm_html_a0da40d3e210aa3b38a17982a7b7866b8"><div class="ttname"><a href="namespacetvm.html#a0da40d3e210aa3b38a17982a7b7866b8">tvm::ret</a></div><div class="ttdeci">PrimExpr ret(PrimExpr value, Span span=Span())</div><div class="ttdoc">Return the value. </div></div>
-<div class="ttc" id="namespacetvm_html_a532ceddde4b8c713b0b1d7e737fcf5fb"><div class="ttname"><a href="namespacetvm.html#a532ceddde4b8c713b0b1d7e737fcf5fb">tvm::sin</a></div><div class="ttdeci">PrimExpr sin(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:903</div></div>
+<div class="ttc" id="namespacetvm_html_a532ceddde4b8c713b0b1d7e737fcf5fb"><div class="ttname"><a href="namespacetvm.html#a532ceddde4b8c713b0b1d7e737fcf5fb">tvm::sin</a></div><div class="ttdeci">PrimExpr sin(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:708</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_af0e52ef3c0d8e11bf493d5163033cd0d"><div class="ttname"><a href="namespacetvm_1_1topi.html#af0e52ef3c0d8e11bf493d5163033cd0d">tvm::topi::FReduce</a></div><div class="ttdeci">std::function&lt; PrimExpr(PrimExpr source, const Array&lt; IterVar &gt; &amp;axis, Array&lt; PrimExpr &gt; init, Span span)&gt; FReduce</div><div class="ttdoc">The operation to use for CommReduce. </div><div class="ttdef"><b>Definition:</b> reduction.h:47</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a51d552441331effb387b7c8fb241c454"><div class="ttname"><a href="namespacetvm_1_1tir.html#a51d552441331effb387b7c8fb241c454">tvm::tir::is_negative_const</a></div><div class="ttdeci">bool is_negative_const(const PrimExpr &amp;a)</div><div class="ttdef"><b>Definition:</b> op.h:1081</div></div>
-<div class="ttc" id="namespacetvm_html_a31e7a3e4a160a1d048e3ba741966f1a8"><div class="ttname"><a href="namespacetvm.html#a31e7a3e4a160a1d048e3ba741966f1a8">tvm::DivAmbiguityError</a></div><div class="ttdeci">void DivAmbiguityError(const TA &amp;a)</div><div class="ttdoc">Helper function to raise a compiler error about division ambiguity. </div><div class="ttdef"><b>Definition:</b> op.h:1268</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a51d552441331effb387b7c8fb241c454"><div class="ttname"><a href="namespacetvm_1_1tir.html#a51d552441331effb387b7c8fb241c454">tvm::tir::is_negative_const</a></div><div class="ttdeci">bool is_negative_const(const PrimExpr &amp;a)</div><div class="ttdef"><b>Definition:</b> op.h:886</div></div>
+<div class="ttc" id="namespacetvm_html_a31e7a3e4a160a1d048e3ba741966f1a8"><div class="ttname"><a href="namespacetvm.html#a31e7a3e4a160a1d048e3ba741966f1a8">tvm::DivAmbiguityError</a></div><div class="ttdeci">void DivAmbiguityError(const TA &amp;a)</div><div class="ttdoc">Helper function to raise a compiler error about division ambiguity. </div><div class="ttdef"><b>Definition:</b> op.h:1073</div></div>
 <div class="ttc" id="namespacetvm_html_a18256ba1213ce5ff3cf8037a314354b7"><div class="ttname"><a href="namespacetvm.html#a18256ba1213ce5ff3cf8037a314354b7">tvm::operator/</a></div><div class="ttdeci">PrimExpr operator/(PrimExpr a, PrimExpr b)</div><div class="ttdoc">division operator </div></div>
 <div class="ttc" id="namespacetvm_html_aaa28e92b677086d89ebfb77204bf92a2"><div class="ttname"><a href="namespacetvm.html#aaa28e92b677086d89ebfb77204bf92a2">tvm::mul</a></div><div class="ttdeci">PrimExpr mul(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">multiplication operator </div></div>
-<div class="ttc" id="namespacetvm_html_a598f8139c469abc4066dbdd0a0a0845d"><div class="ttname"><a href="namespacetvm.html#a598f8139c469abc4066dbdd0a0a0845d">tvm::operator&lt;=</a></div><div class="ttdeci">PrimExpr operator&lt;=(PrimExpr a, PrimExpr b)</div><div class="ttdoc">less_equal </div></div>
-<div class="ttc" id="namespacetvm_html_a69f67f2d38656a8e663af0912d00cb51"><div class="ttname"><a href="namespacetvm.html#a69f67f2d38656a8e663af0912d00cb51">tvm::copysign</a></div><div class="ttdeci">PrimExpr copysign(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:921</div></div>
+<div class="ttc" id="namespacetvm_html_a69f67f2d38656a8e663af0912d00cb51"><div class="ttname"><a href="namespacetvm.html#a69f67f2d38656a8e663af0912d00cb51">tvm::copysign</a></div><div class="ttdeci">PrimExpr copysign(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:726</div></div>
 <div class="ttc" id="classtvm_1_1Type_html"><div class="ttname"><a href="classtvm_1_1Type.html">tvm::Type</a></div><div class="ttdoc">Managed reference to TypeNode. </div><div class="ttdef"><b>Definition:</b> type.h:93</div></div>
 <div class="ttc" id="namespacetvm_html_a62955df1df48917116efe39d4cd18fec"><div class="ttname"><a href="namespacetvm.html#a62955df1df48917116efe39d4cd18fec">tvm::logical_not</a></div><div class="ttdeci">PrimExpr logical_not(PrimExpr a, Span span=Span())</div><div class="ttdoc">not </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a782dc226f8b2b537efdc56b1f76351d1"><div class="ttname"><a href="namespacetvm_1_1tir.html#a782dc226f8b2b537efdc56b1f76351d1">tvm::tir::is_positive_const</a></div><div class="ttdeci">bool is_positive_const(const PrimExpr &amp;a)</div><div class="ttdef"><b>Definition:</b> op.h:1076</div></div>
-<div class="ttc" id="namespacetvm_html_ab25738e50b37cd07b2d171ca74ba9321"><div class="ttname"><a href="namespacetvm.html#ab25738e50b37cd07b2d171ca74ba9321">tvm::operator%</a></div><div class="ttdeci">PrimExpr operator%(const PrimExpr &amp;a, const TB &amp;b)</div><div class="ttdef"><b>Definition:</b> op.h:1296</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a782dc226f8b2b537efdc56b1f76351d1"><div class="ttname"><a href="namespacetvm_1_1tir.html#a782dc226f8b2b537efdc56b1f76351d1">tvm::tir::is_positive_const</a></div><div class="ttdeci">bool is_positive_const(const PrimExpr &amp;a)</div><div class="ttdef"><b>Definition:</b> op.h:881</div></div>
+<div class="ttc" id="namespacetvm_html_ab25738e50b37cd07b2d171ca74ba9321"><div class="ttname"><a href="namespacetvm.html#ab25738e50b37cd07b2d171ca74ba9321">tvm::operator%</a></div><div class="ttdeci">PrimExpr operator%(const PrimExpr &amp;a, const TB &amp;b)</div><div class="ttdef"><b>Definition:</b> op.h:1101</div></div>
 <div class="ttc" id="namespacetvm_html_ae8ecc0382685a855187bede0c97d93e6"><div class="ttname"><a href="namespacetvm.html#ae8ecc0382685a855187bede0c97d93e6">tvm::right_shift</a></div><div class="ttdeci">PrimExpr right_shift(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">right shift operator </div></div>
-<div class="ttc" id="namespacetvm_html_a354b9954ff25dd819a51d856fdd38827"><div class="ttname"><a href="namespacetvm.html#a354b9954ff25dd819a51d856fdd38827">tvm::operator~</a></div><div class="ttdeci">PrimExpr operator~(PrimExpr a)</div><div class="ttdoc">take bitwise negation of two values </div></div>
-<div class="ttc" id="namespacetvm_html_a82be70bd7794abca32473604cbb09569"><div class="ttname"><a href="namespacetvm.html#a82be70bd7794abca32473604cbb09569">tvm::exp</a></div><div class="ttdeci">PrimExpr exp(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:888</div></div>
+<div class="ttc" id="namespacetvm_html_a82be70bd7794abca32473604cbb09569"><div class="ttname"><a href="namespacetvm.html#a82be70bd7794abca32473604cbb09569">tvm::exp</a></div><div class="ttdeci">PrimExpr exp(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:693</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a052732be4d88e9d67dc2cbe83ba6a310"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a052732be4d88e9d67dc2cbe83ba6a310">tvm::runtime::DataType::is_handle</a></div><div class="ttdeci">bool is_handle() const</div><div class="ttdef"><b>Definition:</b> data_type.h:103</div></div>
 <div class="ttc" id="classtvm_1_1PrimExpr_html"><div class="ttname"><a href="classtvm_1_1PrimExpr.html">tvm::PrimExpr</a></div><div class="ttdoc">Reference to PrimExprNode. </div><div class="ttdef"><b>Definition:</b> expr.h:112</div></div>
 <div class="ttc" id="namespacetvm_html_acebb0c446b76d5a28c3b1b55f827c86e"><div class="ttname"><a href="namespacetvm.html#acebb0c446b76d5a28c3b1b55f827c86e">tvm::bitwise_and</a></div><div class="ttdeci">PrimExpr bitwise_and(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take bitwise and of two values </div></div>
 <div class="ttc" id="classtvm_1_1PrimTypeNode_html"><div class="ttname"><a href="classtvm_1_1PrimTypeNode.html">tvm::PrimTypeNode</a></div><div class="ttdoc">Primitive data types used in the low-level IR. </div><div class="ttdef"><b>Definition:</b> type.h:106</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a8de8f843c6eb433b6ddfbf34e24099ef"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8de8f843c6eb433b6ddfbf34e24099ef">tvm::tir::is_no_op</a></div><div class="ttdeci">bool is_no_op(const tir::Stmt &amp;stmt)</div><div class="ttdoc">Check whether stmt is nop. </div><div class="ttdef"><b>Definition:</b> op.h:1091</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a8de8f843c6eb433b6ddfbf34e24099ef"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8de8f843c6eb433b6ddfbf34e24099ef">tvm::tir::is_no_op</a></div><div class="ttdeci">bool is_no_op(const tir::Stmt &amp;stmt)</div><div class="ttdoc">Check whether stmt is nop. </div><div class="ttdef"><b>Definition:</b> op.h:896</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1ObjectRef_html_a2d76fa1fb628ff276a284e61123589c5"><div class="ttname"><a href="classtvm_1_1runtime_1_1ObjectRef.html#a2d76fa1fb628ff276a284e61123589c5">tvm::runtime::ObjectRef::as</a></div><div class="ttdeci">const ObjectType * as() const</div><div class="ttdoc">Try to downcast the internal Object to a raw pointer of a corresponding type. </div><div class="ttdef"><b>Definition:</b> object.h:865</div></div>
 <div class="ttc" id="namespacetvm_html_ac62b239b36ad259a118bb20cb78a01a2"><div class="ttname"><a href="namespacetvm.html#ac62b239b36ad259a118bb20cb78a01a2">tvm::truncdiv</a></div><div class="ttdeci">PrimExpr truncdiv(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute trunc(a / b) </div></div>
-<div class="ttc" id="namespacetvm_html_aa048961a5d19e9f32071c1372809ecbd"><div class="ttname"><a href="namespacetvm.html#aa048961a5d19e9f32071c1372809ecbd">tvm::sigmoid</a></div><div class="ttdeci">PrimExpr sigmoid(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:893</div></div>
-<div class="ttc" id="tir_2op_8h_html_a8fc539385c2bb11740d0a6bef19be7b8"><div class="ttname"><a href="tir_2op_8h.html#a8fc539385c2bb11740d0a6bef19be7b8">TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD_SPANNED</a></div><div class="ttdeci">#define TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD_SPANNED(Name)</div><div class="ttdef"><b>Definition:</b> op.h:1176</div></div>
+<div class="ttc" id="namespacetvm_html_aa048961a5d19e9f32071c1372809ecbd"><div class="ttname"><a href="namespacetvm.html#aa048961a5d19e9f32071c1372809ecbd">tvm::sigmoid</a></div><div class="ttdeci">PrimExpr sigmoid(PrimExpr x, Span span=Span())</div><div class="ttdef"><b>Definition:</b> op.h:698</div></div>
+<div class="ttc" id="tir_2op_8h_html_a8fc539385c2bb11740d0a6bef19be7b8"><div class="ttname"><a href="tir_2op_8h.html#a8fc539385c2bb11740d0a6bef19be7b8">TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD_SPANNED</a></div><div class="ttdeci">#define TVM_DEFINE_BINOP_CONST_VAL_OVERLOAD_SPANNED(Name)</div><div class="ttdef"><b>Definition:</b> op.h:981</div></div>
 <div class="ttc" id="namespacetvm_html_ad4fceb4266c6e7644fa373eacf73359f"><div class="ttname"><a href="namespacetvm.html#ad4fceb4266c6e7644fa373eacf73359f">tvm::left_shift</a></div><div class="ttdeci">PrimExpr left_shift(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">left shift operator </div></div>
-<div class="ttc" id="namespacetvm_html_a236d9aae385e6697874f75e4c8a69f8d"><div class="ttname"><a href="namespacetvm.html#a236d9aae385e6697874f75e4c8a69f8d">tvm::operator|</a></div><div class="ttdeci">PrimExpr operator|(PrimExpr a, PrimExpr b)</div><div class="ttdoc">take bitwise or of two values </div></div>
 <div class="ttc" id="namespacetvm_html_a32a87ae9eacafb2b5b71b28bcc9ef35e"><div class="ttname"><a href="namespacetvm.html#a32a87ae9eacafb2b5b71b28bcc9ef35e">tvm::prod</a></div><div class="ttdeci">PrimExpr prod(PrimExpr source, Array&lt; tir::IterVar &gt; axis, Array&lt; PrimExpr &gt; init={}, Span span=Span())</div><div class="ttdoc">product of of source expression over axis </div></div>
-<div class="ttc" id="namespacetvm_html_af246f441d4ac21b110185b77240b2dcc"><div class="ttname"><a href="namespacetvm.html#af246f441d4ac21b110185b77240b2dcc">tvm::operator+</a></div><div class="ttdeci">PrimExpr operator+(PrimExpr a, PrimExpr b)</div><div class="ttdoc">add operator </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a48bad3db162b334837716bf8e7ba9285"><div class="ttname"><a href="namespacetvm_1_1tir.html#a48bad3db162b334837716bf8e7ba9285">tvm::tir::is_zero</a></div><div class="ttdeci">bool is_zero(const PrimExpr &amp;x)</div><div class="ttdoc">Check whether x is a constant integer 0. </div><div class="ttdef"><b>Definition:</b> op.h:1023</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a48bad3db162b334837716bf8e7ba9285"><div class="ttname"><a href="namespacetvm_1_1tir.html#a48bad3db162b334837716bf8e7ba9285">tvm::tir::is_zero</a></div><div class="ttdeci">bool is_zero(const PrimExpr &amp;x)</div><div class="ttdoc">Check whether x is a constant integer 0. </div><div class="ttdef"><b>Definition:</b> op.h:828</div></div>
 <div class="ttc" id="namespacetvm_html_a41918af1a1dc386388639a9d3ad06c5d"><div class="ttname"><a href="namespacetvm.html#a41918af1a1dc386388639a9d3ad06c5d">tvm::DataType</a></div><div class="ttdeci">runtime::DataType DataType</div><div class="ttdef"><b>Definition:</b> data_type.h:389</div></div>
 <div class="ttc" id="namespacetvm_html_a52fa1dc57423a077eb098960162e7b85"><div class="ttname"><a href="namespacetvm.html#a52fa1dc57423a077eb098960162e7b85">tvm::less</a></div><div class="ttdeci">PrimExpr less(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">less </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_ad1cf4571ee1a22c188c66ee2e6e6c042"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#ad1cf4571ee1a22c188c66ee2e6e6c042">tvm::runtime::DataType::UInt</a></div><div class="ttdeci">static DataType UInt(int bits, int lanes=1)</div><div class="ttdoc">Construct an uint type. </div><div class="ttdef"><b>Definition:</b> data_type.h:161</div></div>
@@ -238,7 +221,6 @@ $(function() {
 <div class="ttc" id="namespacetvm_html_a5f6532d705be039030e980bbbf81b096"><div class="ttname"><a href="namespacetvm.html#a5f6532d705be039030e980bbbf81b096">tvm::pow</a></div><div class="ttdeci">PrimExpr pow(PrimExpr x, PrimExpr y, Span span=Span())</div><div class="ttdoc">Calculate power(x, y) </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_a2fe08e37ac1a1c09b10251313582990d"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#a2fe08e37ac1a1c09b10251313582990d">tvm::runtime::DataType::element_of</a></div><div class="ttdeci">DataType element_of() const</div><div class="ttdoc">Get the scalar version of the type. </div><div class="ttdef"><b>Definition:</b> data_type.h:126</div></div>
 <div class="ttc" id="namespacetvm_html_a4a6399430c10dccd79f64222cda011ed"><div class="ttname"><a href="namespacetvm.html#a4a6399430c10dccd79f64222cda011ed">tvm::isnan</a></div><div class="ttdeci">PrimExpr isnan(PrimExpr x, Span span=Span())</div><div class="ttdoc">Check if x is NaN. </div></div>
-<div class="ttc" id="namespacetvm_html_ad93d00f7b080dc3f905f5c34c170a041"><div class="ttname"><a href="namespacetvm.html#ad93d00f7b080dc3f905f5c34c170a041">tvm::operator&gt;</a></div><div class="ttdeci">PrimExpr operator&gt;(PrimExpr a, PrimExpr b)</div><div class="ttdoc">greater </div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/tir_2op__attr__types_8h_source.html b/docs/reference/api/doxygen/tir_2op__attr__types_8h_source.html
index e176c22ac..aeb0b2050 100644
--- a/docs/reference/api/doxygen/tir_2op__attr__types_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2op__attr__types_8h_source.html
@@ -82,7 +82,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1tir_html_a8f4a86b205145696c0555fd02bd37f46a52c0b756f9ae7d0091ceb863a70e960b"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8f4a86b205145696c0555fd02bd37f46a52c0b756f9ae7d0091ceb863a70e960b">tvm::tir::CallEffectKind::kExprAnnotation</a></div><div class="ttdoc">Function corresponds to an annotation(e.g. likely) and can translate to identity. ...</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_add7d0a6b1dd91f0c3c5dd2f4cf64358eaf324873e6195114a186db7f910559b2c"><div class="ttname"><a href="namespacetvm_1_1tir.html#add7d0a6b1dd91f0c3c5dd2f4cf64358eaf324873e6195114a186db7f910559b2c">tvm::tir::kOpaque</a></div><div class="ttdoc">IterVar is opaque,. </div><div class="ttdef"><b>Definition:</b> var.h:220</div></div>
 <div class="ttc" id="packed__func_8h_html"><div class="ttname"><a href="packed__func_8h.html">packed_func.h</a></div><div class="ttdoc">Type-erased function used across TVM API. </div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/tir_2usmp_2transform_8h_source.html b/docs/reference/api/doxygen/tir_2usmp_2transform_8h_source.html
index ee34f2ee7..39d322020 100644
--- a/docs/reference/api/doxygen/tir_2usmp_2transform_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2usmp_2transform_8h_source.html
@@ -67,7 +67,7 @@ $(function() {
 </div><!--header-->
 <div class="contents">
 <a href="tir_2usmp_2transform_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment">  [...]
-<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:369</div></div>
+<div class="ttc" id="classtvm_1_1Bool_html"><div class="ttname"><a href="classtvm_1_1Bool.html">tvm::Bool</a></div><div class="ttdoc">Boolean constant. </div><div class="ttdef"><b>Definition:</b> expr.h:583</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_1_1usmp_1_1transform_html_a1b12a47b959ac6298f1e3df40ed48458"><div class="ttname"><a href="namespacetvm_1_1tir_1_1usmp_1_1transform.html#a1b12a47b959ac6298f1e3df40ed48458">tvm::tir::usmp::transform::AssignPoolInfo</a></div><div class="ttdeci">Pass AssignPoolInfo()</div><div class="ttdoc">Assign PoolInfo objects to tir.allocate nodes depending on the PrimFunc&amp;#39;s target. ...</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_1_1usmp_1_1transform_html_ad1751f300f05f2448d280b98c48b65a1"><div class="ttname"><a href="namespacetvm_1_1tir_1_1usmp_1_1transform.html#ad1751f300f05f2448d280b98c48b65a1">tvm::tir::usmp::transform::CreateAllocatesForIO</a></div><div class="ttdeci">Pass CreateAllocatesForIO()</div><div class="ttdoc">This pass creates Allocate nodes for I/O tensors. </div></div>
diff --git a/docs/reference/api/doxygen/tir_2usmp_2utils_8h_source.html b/docs/reference/api/doxygen/tir_2usmp_2utils_8h_source.html
index 5a8552674..7f021a965 100644
--- a/docs/reference/api/doxygen/tir_2usmp_2utils_8h_source.html
+++ b/docs/reference/api/doxygen/tir_2usmp_2utils_8h_source.html
@@ -136,7 +136,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1tir_1_1usmp_html_ae54e3c895dbf7871be67970f91b16b95af2bbd8203bc7c5c4efd47aa348753504"><div class="ttname"><a href="namespacetvm_1_1tir_1_1usmp.html#ae54e3c895dbf7871be67970f91b16b95af2bbd8203bc7c5c4efd47aa348753504">tvm::tir::usmp::BufferInfoKind::kOutput</a></div></div>
 <div class="ttc" id="structtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfoNode_html_a270341b8174df730ae457c0ab414da32"><div class="ttname"><a href="structtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfoNode.html#a270341b8174df730ae457c0ab414da32">tvm::tir::usmp::AllocatedPoolInfoNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> utils.h:221</div></div>
 <div class="ttc" id="structtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysisNode_html_a99198da52aa426b9cdec9b6ad776b591"><div class="ttname"><a href="structtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysisNode.html#a99198da52aa426b9cdec9b6ad776b591">tvm::tir::usmp::BufferInfoAnalysisNode::SHashReduce</a></div><div class="ttdeci">void SHashReduce(SHashReducer hash_reduce) const</div><div class="ttdef"><b>Definition:</b> utils.h:154</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/topi_2nn_8h_source.html b/docs/reference/api/doxygen/topi_2nn_8h_source.html
index 700d71831..cb7668d80 100644
--- a/docs/reference/api/doxygen/topi_2nn_8h_source.html
+++ b/docs/reference/api/doxygen/topi_2nn_8h_source.html
@@ -69,7 +69,7 @@ $(function() {
 <a href="topi_2nn_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="comment"> * or more co [...]
 <div class="ttc" id="namespacetvm_1_1topi_html_a582bc98a3956894e8e90a3a3da929568"><div class="ttname"><a href="namespacetvm_1_1topi.html#a582bc98a3956894e8e90a3a3da929568">tvm::topi::divide</a></div><div class="ttdeci">tvm::PrimExpr divide(const tvm::PrimExpr &amp;a, const tvm::PrimExpr &amp;b)</div><div class="ttdef"><b>Definition:</b> broadcast.h:239</div></div>
 <div class="ttc" id="namespacetvm_html_ada5ad8338d3144221d8f16380e6c4855"><div class="ttname"><a href="namespacetvm.html#ada5ad8338d3144221d8f16380e6c4855">tvm::indexmod</a></div><div class="ttdeci">PrimExpr indexmod(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute the remainder floor(a / b) where a and b are non-negative. </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="namespacetvm_1_1te_html"><div class="ttname"><a href="namespacetvm_1_1te.html">tvm::te</a></div><div class="ttdoc">Tensor expression language DSL. </div><div class="ttdef"><b>Definition:</b> extracted_task.h:33</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1Var_html"><div class="ttname"><a href="classtvm_1_1tir_1_1Var.html">tvm::tir::Var</a></div><div class="ttdoc">a named variable in TIR </div><div class="ttdef"><b>Definition:</b> var.h:88</div></div>
@@ -79,7 +79,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_a29e22aa45900dad3b6f9f705bb1dc688"><div class="ttname"><a href="namespacetvm_1_1topi.html#a29e22aa45900dad3b6f9f705bb1dc688">tvm::topi::kInjective</a></div><div class="ttdeci">constexpr auto kInjective</div><div class="ttdef"><b>Definition:</b> tags.h:33</div></div>
 <div class="ttc" id="reduction_8h_html"><div class="ttname"><a href="reduction_8h.html">reduction.h</a></div><div class="ttdoc">Reduction op constructors. </div></div>
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html_a9b440f852f12ad0a4d8ed5ed97054425"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html#a9b440f852f12ad0a4d8ed5ed97054425">tvm::arith::Analyzer::Simplify</a></div><div class="ttdeci">PrimExpr Simplify(const PrimExpr &amp;expr, int steps=2)</div><div class="ttdoc">Simplify expr. </div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a6454dd89e85fc29a7e3b8620df90a6f6"><div class="ttname"><a href="namespacetvm_1_1tir.html#a6454dd89e85fc29a7e3b8620df90a6f6">tvm::tir::foldl</a></div><div class="ttdeci">PrimExpr foldl(FReduce freduce, PrimExpr init_value, const Array&lt; PrimExpr &gt; &amp;values, Span span=Span())</div><div class="ttdoc">Left fold. </div><div class="ttdef"><b>Definition:</b> op.h:1146</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a6454dd89e85fc29a7e3b8620df90a6f6"><div class="ttname"><a href="namespacetvm_1_1tir.html#a6454dd89e85fc29a7e3b8620df90a6f6">tvm::tir::foldl</a></div><div class="ttdeci">PrimExpr foldl(FReduce freduce, PrimExpr init_value, const Array&lt; PrimExpr &gt; &amp;values, Span span=Span())</div><div class="ttdoc">Left fold. </div><div class="ttdef"><b>Definition:</b> op.h:951</div></div>
 <div class="ttc" id="tir_2op_8h_html"><div class="ttname"><a href="tir_2op_8h.html">op.h</a></div><div class="ttdoc">Common operators defined for Expr. </div></div>
 <div class="ttc" id="namespacetvm_html_a4bfb789a86d95f6241b50fd26f269c28"><div class="ttname"><a href="namespacetvm.html#a4bfb789a86d95f6241b50fd26f269c28">tvm::cast</a></div><div class="ttdeci">PrimExpr cast(const DataType &amp;t, PrimExpr value, Span span=Span())</div><div class="ttdoc">cast value to type. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_aeb1547800d4b7625326a176ca1dec6e0"><div class="ttname"><a href="namespacetvm_1_1topi.html#aeb1547800d4b7625326a176ca1dec6e0">tvm::topi::nll_loss</a></div><div class="ttdeci">Tensor nll_loss(const Tensor &amp;predictions, const Tensor &amp;targets, const Tensor &amp;weights, std::string reduction=&quot;mean&quot;, int ignore_index=-100, const std::string name=&quot;nll_loss&quot;, const std::string tag=kBroadcast)</div><div class="ttdoc">Nega [...]
@@ -91,8 +91,8 @@ $(function() {
 <div class="ttc" id="constant__utils_8h_html"><div class="ttname"><a href="constant__utils_8h.html">constant_utils.h</a></div><div class="ttdoc">Utility functions for handling constants in TVM expressions. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a9094c3c432410ef2675444ecf8e06bfe"><div class="ttname"><a href="namespacetvm_1_1topi.html#a9094c3c432410ef2675444ecf8e06bfe">tvm::topi::kDepthwiseConv2dNHWC</a></div><div class="ttdeci">constexpr auto kDepthwiseConv2dNHWC</div><div class="ttdef"><b>Definition:</b> tags.h:41</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a13aaf23f0ab77f1ed4a7d4b7816bf210"><div class="ttname"><a href="namespacetvm_1_1topi.html#a13aaf23f0ab77f1ed4a7d4b7816bf210">tvm::topi::kBroadcast</a></div><div class="ttdeci">constexpr auto kBroadcast</div><div class="ttdef"><b>Definition:</b> tags.h:36</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_a8dd84303a9864b5b366835fa628a7824"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8dd84303a9864b5b366835fa628a7824">tvm::tir::const_true</a></div><div class="ttdeci">PrimExpr const_true(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant true expression. </div><div class="ttdef"><b>Definition:</b> op.h:967</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a8dd84303a9864b5b366835fa628a7824"><div class="ttname"><a href="namespacetvm_1_1tir.html#a8dd84303a9864b5b366835fa628a7824">tvm::tir::const_true</a></div><div class="ttdeci">PrimExpr const_true(int lanes=1, Span span=Span())</div><div class="ttdoc">Make a constant true expression. </div><div class="ttdef"><b>Definition:</b> op.h:772</div></div>
 <div class="ttc" id="classtvm_1_1Span_html"><div class="ttname"><a href="classtvm_1_1Span.html">tvm::Span</a></div><div class="ttdef"><b>Definition:</b> span.h:115</div></div>
 <div class="ttc" id="namespacetvm_html_a16f9cd9219b505e2cc05c5a7558ac61f"><div class="ttname"><a href="namespacetvm.html#a16f9cd9219b505e2cc05c5a7558ac61f">tvm::div</a></div><div class="ttdeci">PrimExpr div(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute division in C semantics. </div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aed6387e67d18b9d5ad18f510fd600a25"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aed6387e67d18b9d5ad18f510fd600a25">tvm::runtime::Array::size</a></div><div class="ttdeci">size_t size() const</div><div class="ttdef"><b>Definition:</b> array.h:399</div></div>
@@ -125,7 +125,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_a315c34bbe2bf1be4c778acae08c906fc"><div class="ttname"><a href="namespacetvm_1_1topi.html#a315c34bbe2bf1be4c778acae08c906fc">tvm::topi::prelu</a></div><div class="ttdeci">tvm::te::Tensor prelu(const tvm::te::Tensor &amp;x, const tvm::te::Tensor &amp;slope, const int axis=1, std::string name=&quot;T_prelu&quot;, std::string tag=kBroadcast)</div><div class="ttdoc">Creates an operation that performs a parametric rectified linear unit. </div><di [...]
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html">tvm::arith::Analyzer</a></div><div class="ttdoc">Analyzer that contains bunch of sub-analyzers. </div><div class="ttdef"><b>Definition:</b> analyzer.h:423</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_ab45f13dd70d982d9f977c79b6f7fac98"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#ab45f13dd70d982d9f977c79b6f7fac98">tvm::runtime::DataType::Int</a></div><div class="ttdeci">static DataType Int(int bits, int lanes=1)</div><div class="ttdoc">Construct an int type. </div><div class="ttdef"><b>Definition:</b> data_type.h:154</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a4c2a0e74a45381e899f9ff788365eff0"><div class="ttname"><a href="namespacetvm_1_1topi.html#a4c2a0e74a45381e899f9ff788365eff0">tvm::topi::group_conv2d_ngchw</a></div><div class="ttdeci">tvm::te::Tensor group_conv2d_ngchw(const tvm::te::Tensor &amp;I, const tvm::te::Tensor &amp;W, int pad_h=0, int pad_w=0, int stride_h=1, int stride_w=1, std::string name=&quot;T_group_conv2d_ngchw&quot;, std::string tag=kGroupConv2d)</div><div class="ttdoc">Cre [...]
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/reference/api/doxygen/topi_2transform_8h_source.html b/docs/reference/api/doxygen/topi_2transform_8h_source.html
index af5237708..4ffb02a25 100644
--- a/docs/reference/api/doxygen/topi_2transform_8h_source.html
+++ b/docs/reference/api/doxygen/topi_2transform_8h_source.html
@@ -74,7 +74,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_a877e6fdffb6b6c051c29602ec6fe995c"><div class="ttname"><a href="namespacetvm_1_1topi.html#a877e6fdffb6b6c051c29602ec6fe995c">tvm::topi::sparse_to_dense</a></div><div class="ttdeci">Tensor sparse_to_dense(const Tensor &amp;sparse_indices, const Array&lt; PrimExpr &gt; &amp;output_shape, const Tensor &amp;sparse_values, const PrimExpr &amp;default_value, const std::string name=&quot;T_sparse_to_dense&quot;, const std::string tag=kInjective)</d [...]
 <div class="ttc" id="namespacetvm_html_ada5ad8338d3144221d8f16380e6c4855"><div class="ttname"><a href="namespacetvm.html#ada5ad8338d3144221d8f16380e6c4855">tvm::indexmod</a></div><div class="ttdeci">PrimExpr indexmod(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute the remainder floor(a / b) where a and b are non-negative. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_aff75b2e533b654909ca1bd9485ef4e6b"><div class="ttname"><a href="namespacetvm_1_1topi.html#aff75b2e533b654909ca1bd9485ef4e6b">tvm::topi::StridedSliceOutputShape</a></div><div class="ttdeci">Array&lt; PrimExpr &gt; StridedSliceOutputShape(const Array&lt; PrimExpr &gt; &amp;ishape, const Array&lt; Integer &gt; &amp;begin, const Array&lt; Integer &gt; &amp;end, const Array&lt; Integer &gt; &amp;strides, const Array&lt; Integer &gt; &amp;axes, co [...]
-<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:1130</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_a1a071208bbbab6b220cf46f5cdccdd86"><div class="ttname"><a href="namespacetvm_1_1tir.html#a1a071208bbbab6b220cf46f5cdccdd86">tvm::tir::make_const</a></div><div class="ttdeci">PrimExpr make_const(DataType t, ValueType value, Span span=Span())</div><div class="ttdoc">Make a const value with certain data type. </div><div class="ttdef"><b>Definition:</b> op.h:935</div></div>
 <div class="ttc" id="classtvm_1_1Range_html_a91e7301ca1d135ca5f8ed199efbb9818"><div class="ttname"><a href="classtvm_1_1Range.html#a91e7301ca1d135ca5f8ed199efbb9818">tvm::Range::FromMinExtent</a></div><div class="ttdeci">static Range FromMinExtent(PrimExpr min, PrimExpr extent, Span span=Span())</div><div class="ttdoc">construct a new range with min and extent The corresponding constructor is removed, because that is counter convention of tradition meaning of range(begin, end) </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a3230e1735957c2045c89cf190e0f8c34"><div class="ttname"><a href="namespacetvm_1_1topi.html#a3230e1735957c2045c89cf190e0f8c34">tvm::topi::sliding_window</a></div><div class="ttdeci">Tensor sliding_window(const Tensor &amp;x, int axis, Array&lt; Integer &gt; window_shape, Array&lt; Integer &gt; strides, std::string name=&quot;T_sliding_window&quot;, std::string tag=&quot;&quot;)</div><div class="ttdoc">Creates an operation to slide a window ove [...]
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
@@ -103,7 +103,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_html_a4bfb789a86d95f6241b50fd26f269c28"><div class="ttname"><a href="namespacetvm.html#a4bfb789a86d95f6241b50fd26f269c28">tvm::cast</a></div><div class="ttdeci">PrimExpr cast(const DataType &amp;t, PrimExpr value, Span span=Span())</div><div class="ttdoc">cast value to type. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a845e38c0f34017d45ec318935b6ddf17"><div class="ttname"><a href="namespacetvm_1_1topi.html#a845e38c0f34017d45ec318935b6ddf17">tvm::topi::squeeze</a></div><div class="ttdeci">Tensor squeeze(const Tensor &amp;x, Array&lt; Integer &gt; axis, bool atleast1d=false, std::string name=&quot;T_squeeze&quot;, std::string tag=kInjective)</div><div class="ttdoc">Remove size 1 dimensions from the shape of a tensor. The removed dimensions must have a const [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_ab5db2ee9a8be71931324dac552be24c4"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#ab5db2ee9a8be71931324dac552be24c4">tvm::runtime::Array::Set</a></div><div class="ttdeci">void Set(int64_t i, T value)</div><div class="ttdoc">set i-th element of the array. </div><div class="ttdef"><b>Definition:</b> array.h:567</div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:275</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html"><div class="ttname"><a href="classtvm_1_1IntImmNode.html">tvm::IntImmNode</a></div><div class="ttdoc">Constant integer literals in the program. </div><div class="ttdef"><b>Definition:</b> expr.h:489</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="index__map_8h_html"><div class="ttname"><a href="index__map_8h.html">index_map.h</a></div><div class="ttdoc">Defines a remapping of buffer indices. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a7ddbd03d0d29a05618a1ef42f717ec9f"><div class="ttname"><a href="namespacetvm_1_1topi.html#a7ddbd03d0d29a05618a1ef42f717ec9f">tvm::topi::expand_dims</a></div><div class="ttdeci">Tensor expand_dims(const Tensor &amp;x, int axis, int num_newaxis=1, std::string name=&quot;T_expand_dims&quot;, std::string tag=kBroadcast)</div><div class="ttdoc">Creates an operation to insert new dimensions of length 1. </div><div class="ttdef"><b>Definition:</b>  [...]
@@ -111,7 +111,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_1_1topi_html_abee7c35e8c15e2e61afe35852dfcb252"><div class="ttname"><a href="namespacetvm_1_1topi.html#abee7c35e8c15e2e61afe35852dfcb252">tvm::topi::sum</a></div><div class="ttdeci">Tensor sum(const Tensor &amp;data, const Array&lt; Integer &gt; &amp;axis, bool keepdims=false, bool atleast1d=false)</div><div class="ttdoc">Creates an operation that sums array elements over a given axis. </div><div class="ttdef"><b>Definition:</b> reduction.h:326</div></div>
 <div class="ttc" id="constant__utils_8h_html"><div class="ttname"><a href="constant__utils_8h.html">constant_utils.h</a></div><div class="ttdoc">Utility functions for handling constants in TVM expressions. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a13aaf23f0ab77f1ed4a7d4b7816bf210"><div class="ttname"><a href="namespacetvm_1_1topi.html#a13aaf23f0ab77f1ed4a7d4b7816bf210">tvm::topi::kBroadcast</a></div><div class="ttdeci">constexpr auto kBroadcast</div><div class="ttdef"><b>Definition:</b> tags.h:36</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a1db52a53bfb38341ef89b375562731c6"><div class="ttname"><a href="namespacetvm_1_1topi.html#a1db52a53bfb38341ef89b375562731c6">tvm::topi::arange</a></div><div class="ttdeci">Tensor arange(const PrimExpr &amp;start, const PrimExpr &amp;stop, const PrimExpr &amp;step, DataType dtype, std::string name=&quot;T_arange&quot;, std::string tag=kInjective)</div><div class="ttdef"><b>Definition:</b> transform.h:1539</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aed6387e67d18b9d5ad18f510fd600a25"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aed6387e67d18b9d5ad18f510fd600a25">tvm::runtime::Array::size</a></div><div class="ttdeci">size_t size() const</div><div class="ttdef"><b>Definition:</b> array.h:399</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html">tvm::runtime::DataType</a></div><div class="ttdoc">Runtime primitive data type. </div><div class="ttdef"><b>Definition:</b> data_type.h:41</div></div>
@@ -125,11 +125,11 @@ $(function() {
 <div class="ttc" id="namespacetvm_html_a8f30aa0685ca52f846843e76a1ad1dc7"><div class="ttname"><a href="namespacetvm.html#a8f30aa0685ca52f846843e76a1ad1dc7">tvm::indexdiv</a></div><div class="ttdeci">PrimExpr indexdiv(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">compute floor(a / b) where a and b are non-negative. </div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a6f3c749dadc49aa100626ef85a04a66b"><div class="ttname"><a href="namespacetvm_1_1topi.html#a6f3c749dadc49aa100626ef85a04a66b">tvm::topi::concatenate</a></div><div class="ttdeci">Tensor concatenate(const Array&lt; Tensor &gt; &amp;inputs, int axis=0, std::string name=&quot;T_concat&quot;, std::string tag=kInjective)</div><div class="ttdoc">Join a sequence of tensors along an existing axis. </div><div class="ttdef"><b>Definition:</b> transform. [...]
 <div class="ttc" id="namespacetvm_1_1topi_html_aa1468cc7d8f47a44800fa38d6377ae67"><div class="ttname"><a href="namespacetvm_1_1topi.html#aa1468cc7d8f47a44800fa38d6377ae67">tvm::topi::take</a></div><div class="ttdeci">Tensor take(const Tensor &amp;a, const Tensor &amp;indices, int batch_dims, std::string mode=&quot;clip&quot;, std::string name=&quot;T_take&quot;, std::string tag=kInjective)</div><div class="ttdoc">Take elements from an flattened input array when axis is None. </div><div c [...]
-<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:304</div></div>
+<div class="ttc" id="classtvm_1_1IntImm_html"><div class="ttname"><a href="classtvm_1_1IntImm.html">tvm::IntImm</a></div><div class="ttdoc">Managed reference class to IntImmNode. </div><div class="ttdef"><b>Definition:</b> expr.h:518</div></div>
 <div class="ttc" id="namespacetvm_html_a0df5ca82d2c566f628ebb2f1e84a3fcb"><div class="ttname"><a href="namespacetvm.html#a0df5ca82d2c566f628ebb2f1e84a3fcb">tvm::max</a></div><div class="ttdeci">PrimExpr max(PrimExpr a, PrimExpr b, Span span=Span())</div><div class="ttdoc">take maximum of two values </div></div>
-<div class="ttc" id="classtvm_1_1IntImmNode_html_a81f4c116ffb5931fdd64639eacad415d"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a81f4c116ffb5931fdd64639eacad415d">tvm::IntImmNode::value</a></div><div class="ttdeci">int64_t value</div><div class="ttdoc">the Internal value. </div><div class="ttdef"><b>Definition:</b> expr.h:278</div></div>
+<div class="ttc" id="classtvm_1_1IntImmNode_html_a81f4c116ffb5931fdd64639eacad415d"><div class="ttname"><a href="classtvm_1_1IntImmNode.html#a81f4c116ffb5931fdd64639eacad415d">tvm::IntImmNode::value</a></div><div class="ttdeci">int64_t value</div><div class="ttdoc">the Internal value. </div><div class="ttdef"><b>Definition:</b> expr.h:492</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1String_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1String.html">tvm::runtime::String</a></div><div class="ttdoc">Reference to string objects. </div><div class="ttdef"><b>Definition:</b> string.h:124</div></div>
-<div class="ttc" id="namespacetvm_1_1tir_html_aed3f57cf8d1c3546f075701898c5b70f"><div class="ttname"><a href="namespacetvm_1_1tir.html#aed3f57cf8d1c3546f075701898c5b70f">tvm::tir::make_zero</a></div><div class="ttdeci">PrimExpr make_zero(DataType t, Span span=Span())</div><div class="ttdoc">Make a const zero expr. </div><div class="ttdef"><b>Definition:</b> op.h:1138</div></div>
+<div class="ttc" id="namespacetvm_1_1tir_html_aed3f57cf8d1c3546f075701898c5b70f"><div class="ttname"><a href="namespacetvm_1_1tir.html#aed3f57cf8d1c3546f075701898c5b70f">tvm::tir::make_zero</a></div><div class="ttdeci">PrimExpr make_zero(DataType t, Span span=Span())</div><div class="ttdoc">Make a const zero expr. </div><div class="ttdef"><b>Definition:</b> op.h:943</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_af30c02f3a3f37c7963b3af60fb9c72a1"><div class="ttname"><a href="namespacetvm_1_1topi.html#af30c02f3a3f37c7963b3af60fb9c72a1">tvm::topi::shape</a></div><div class="ttdeci">Tensor shape(const Tensor &amp;src, DataType dtype, const std::string name=&quot;T_shape&quot;, const std::string tag=kInjective)</div><div class="ttdoc">Get the shape of input tensor. </div><div class="ttdef"><b>Definition:</b> transform.h:1758</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_a453f9f5a35a9852a086ce0228ee22164"><div class="ttname"><a href="namespacetvm_1_1topi.html#a453f9f5a35a9852a086ce0228ee22164">tvm::topi::meshgrid</a></div><div class="ttdeci">Array&lt; Tensor &gt; meshgrid(const Array&lt; Tensor &gt; &amp;inputs, const std::string &amp;indexing, std::string name=&quot;T_meshgrid&quot;, std::string tag=kInjective)</div><div class="ttdoc">Produce grids by expanding input over dimensions defined by other inputs. [...]
 <div class="ttc" id="namespacetvm_1_1te_html_aae384e9b73c2271905486e4a74b69265"><div class="ttname"><a href="namespacetvm_1_1te.html#aae384e9b73c2271905486e4a74b69265">tvm::te::reduce_axis</a></div><div class="ttdeci">IterVar reduce_axis(Range dom, std::string name=&quot;rv&quot;)</div><div class="ttdoc">Create a new IterVar for reduction operations. </div></div>
@@ -167,7 +167,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1arith_1_1Analyzer_html"><div class="ttname"><a href="classtvm_1_1arith_1_1Analyzer.html">tvm::arith::Analyzer</a></div><div class="ttdoc">Analyzer that contains bunch of sub-analyzers. </div><div class="ttdef"><b>Definition:</b> analyzer.h:423</div></div>
 <div class="ttc" id="namespacetvm_1_1topi_html_adae7dcb7e951109ba72192202d182994"><div class="ttname"><a href="namespacetvm_1_1topi.html#adae7dcb7e951109ba72192202d182994">tvm::topi::matmul</a></div><div class="ttdeci">tvm::te::Tensor matmul(const tvm::te::Tensor &amp;A, const tvm::te::Tensor &amp;B, bool trans_a=false, bool trans_b=false, std::string name=&quot;T_matmul&quot;, std::string tag=kMatMul)</div><div class="ttdoc">Creates an operation that calculates a matrix multiplication ( [...]
 <div class="ttc" id="classtvm_1_1runtime_1_1DataType_html_ab45f13dd70d982d9f977c79b6f7fac98"><div class="ttname"><a href="classtvm_1_1runtime_1_1DataType.html#ab45f13dd70d982d9f977c79b6f7fac98">tvm::runtime::DataType::Int</a></div><div class="ttdeci">static DataType Int(int bits, int lanes=1)</div><div class="ttdoc">Construct an int type. </div><div class="ttdef"><b>Definition:</b> data_type.h:154</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
diff --git a/docs/reference/api/doxygen/topi_2utils_8h_source.html b/docs/reference/api/doxygen/topi_2utils_8h_source.html
index 3f3ccc749..17858fe11 100644
--- a/docs/reference/api/doxygen/topi_2utils_8h_source.html
+++ b/docs/reference/api/doxygen/topi_2utils_8h_source.html
@@ -70,7 +70,7 @@ $(function() {
 <div class="ttc" id="namespacetvm_html"><div class="ttname"><a href="namespacetvm.html">tvm</a></div><div class="ttdoc">runtime implementation for LibTorch/TorchScript. </div><div class="ttdef"><b>Definition:</b> analyzer.h:36</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html_aa026b914ee05f81b6c20130b8905f257"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html#aa026b914ee05f81b6c20130b8905f257">tvm::runtime::Array::push_back</a></div><div class="ttdeci">void push_back(const T &amp;item)</div><div class="ttdoc">push a new item to the back of the list </div><div class="ttdef"><b>Definition:</b> array.h:436</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1Array_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array</a></div><div class="ttdoc">Array, container representing a contiguous sequence of ObjectRefs. </div><div class="ttdef"><b>Definition:</b> array.h:270</div></div>
-<div class="ttc" id="namespacetvm_1_1runtime_html"><div class="ttname"><a href="namespacetvm_1_1runtime.html">tvm::runtime</a></div><div class="ttdef"><b>Definition:</b> expr.h:544</div></div>
+<div class="ttc" id="namespacetvm_1_1runtime_html"><div class="ttname"><a href="namespacetvm_1_1runtime.html">tvm::runtime</a></div><div class="ttdef"><b>Definition:</b> expr.h:758</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMPODValue___html_aefca71073146f4be36d6a4a0de33d6e0"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMPODValue__.html#aefca71073146f4be36d6a4a0de33d6e0">tvm::runtime::TVMPODValue_::type_code</a></div><div class="ttdeci">int type_code() const</div><div class="ttdef"><b>Definition:</b> packed_func.h:610</div></div>
 <div class="ttc" id="classtvm_1_1runtime_1_1TVMArgValue_html"><div class="ttname"><a href="classtvm_1_1runtime_1_1TVMArgValue.html">tvm::runtime::TVMArgValue</a></div><div class="ttdoc">A single argument value to PackedFunc. Containing both type_code and TVMValue. </div><div class="ttdef"><b>Definition:</b> packed_func.h:646</div></div>
 <div class="ttc" id="packed__func_8h_html"><div class="ttname"><a href="packed__func_8h.html">packed_func.h</a></div><div class="ttdoc">Type-erased function used across TVM API. </div></div>
diff --git a/docs/reference/api/doxygen/transform__step_8h_source.html b/docs/reference/api/doxygen/transform__step_8h_source.html
index 078c7c38d..f3a6dc049 100644
--- a/docs/reference/api/doxygen/transform__step_8h_source.html
+++ b/docs/reference/api/doxygen/transform__step_8h_source.html
@@ -112,7 +112,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode_html_a9967fa04ce3db64cac7bf636b994fc0a"><div class="ttname"><a href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode.html#a9967fa04ce3db64cac7bf636b994fc0a">tvm::auto_scheduler::FollowFusedSplitStepNode::level</a></div><div class="ttdeci">int level</div><div class="ttdoc">Use the length in this split level. </div><div class="ttdef"><b>Definition:</b> transform_step.h:664</div></div>
 <div class="ttc" id="classtvm_1_1auto__scheduler_1_1FollowSplitStepNode_html"><div class="ttname"><a href="classtvm_1_1auto__scheduler_1_1FollowSplitStepNode.html">tvm::auto_scheduler::FollowSplitStepNode</a></div><div class="ttdoc">Similar to SplitStepNode, but uses split factors from another step (i.e. Follow another split step) ...</div><div class="ttdef"><b>Definition:</b> transform_step.h:578</div></div>
 <div class="ttc" id="classtvm_1_1auto__scheduler_1_1CacheWriteStepNode_html_ae296d4462c2def1263a95a4c57ce9514"><div class="ttname"><a href="classtvm_1_1auto__scheduler_1_1CacheWriteStepNode.html#ae296d4462c2def1263a95a4c57ce9514">tvm::auto_scheduler::CacheWriteStepNode::scope_name</a></div><div class="ttdeci">String scope_name</div><div class="ttdoc">The scope name of the newly added compute stage. (e.g. local, shared, global) </div><div class="ttdef"><b>Definition:</b> transform_step.h: [...]
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode_html"><div class="ttname"><a href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode.html">tvm::auto_scheduler::FollowFusedSplitStepNode</a></div><div class="ttdoc">Similar to FollowSplitStep, but uses split factors from multiple steps. </div><div class="ttdef"><b>Definition:</b> transform_step.h:657</div></div>
 <div class="ttc" id="classtvm_1_1auto__scheduler_1_1IteratorNode_html"><div class="ttname"><a href="classtvm_1_1auto__scheduler_1_1IteratorNode.html">tvm::auto_scheduler::IteratorNode</a></div><div class="ttdoc">An iterator of a for-loop Similar to tvm::IterVar in include/tvm/tir/expr.h </div><div class="ttdef"><b>Definition:</b> transform_step.h:116</div></div>
 <div class="ttc" id="namespacetvm_1_1auto__scheduler_html_ad81bc395fc88957fbd33bf041adbe0ec"><div class="ttname"><a href="namespacetvm_1_1auto__scheduler.html#ad81bc395fc88957fbd33bf041adbe0ec">tvm::auto_scheduler::IteratorAnnotation</a></div><div class="ttdeci">IteratorAnnotation</div><div class="ttdoc">The type of an iterator&amp;#39;s annotation. </div><div class="ttdef"><b>Definition:</b> transform_step.h:80</div></div>
diff --git a/docs/reference/api/doxygen/var_8h_source.html b/docs/reference/api/doxygen/var_8h_source.html
index 7b627e51d..749b95ed7 100644
--- a/docs/reference/api/doxygen/var_8h_source.html
+++ b/docs/reference/api/doxygen/var_8h_source.html
@@ -95,7 +95,7 @@ $(function() {
 <div class="ttc" id="classtvm_1_1tir_1_1IterVarNode_html"><div class="ttname"><a href="classtvm_1_1tir_1_1IterVarNode.html">tvm::tir::IterVarNode</a></div><div class="ttdoc">An iteration variable representing an iteration over a one dimensional interval. </div><div class="ttdef"><b>Definition:</b> var.h:247</div></div>
 <div class="ttc" id="namespacetvm_1_1tir_html_add7d0a6b1dd91f0c3c5dd2f4cf64358e"><div class="ttname"><a href="namespacetvm_1_1tir.html#add7d0a6b1dd91f0c3c5dd2f4cf64358e">tvm::tir::IterVarType</a></div><div class="ttdeci">IterVarType</div><div class="ttdoc">Type of iteration variable. Each IterVar have a specific type. </div><div class="ttdef"><b>Definition:</b> var.h:178</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1SizeVar_html_ac470249315d9e395ad581d35dd5dcb05"><div class="ttname"><a href="classtvm_1_1tir_1_1SizeVar.html#ac470249315d9e395ad581d35dd5dcb05">tvm::tir::SizeVar::SizeVar</a></div><div class="ttdeci">SizeVar(ObjectPtr&lt; Object &gt; n)</div><div class="ttdef"><b>Definition:</b> var.h:146</div></div>
-<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:497</div></div>
+<div class="ttc" id="classtvm_1_1Range_html"><div class="ttname"><a href="classtvm_1_1Range.html">tvm::Range</a></div><div class="ttdoc">Range constainer. </div><div class="ttdef"><b>Definition:</b> expr.h:711</div></div>
 <div class="ttc" id="classtvm_1_1Span_html"><div class="ttname"><a href="classtvm_1_1Span.html">tvm::Span</a></div><div class="ttdef"><b>Definition:</b> span.h:115</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1IterVarNode_html_adec9071918d696818dc32a6a00e02912"><div class="ttname"><a href="classtvm_1_1tir_1_1IterVarNode.html#adec9071918d696818dc32a6a00e02912">tvm::tir::IterVarNode::VisitAttrs</a></div><div class="ttdeci">void VisitAttrs(AttrVisitor *v)</div><div class="ttdef"><b>Definition:</b> var.h:269</div></div>
 <div class="ttc" id="classtvm_1_1tir_1_1IterVarNode_html_aa4d51a49e06cf6941988c317c53c1b8c"><div class="ttname"><a href="classtvm_1_1tir_1_1IterVarNode.html#aa4d51a49e06cf6941988c317c53c1b8c">tvm::tir::IterVarNode::span</a></div><div class="ttdeci">Span span</div><div class="ttdoc">Span that points to the original source code. Reserved debug information. </div><div class="ttdef"><b>Definition:</b> var.h:267</div></div>
diff --git a/docs/reference/api/doxygen/virtual__device_8h_source.html b/docs/reference/api/doxygen/virtual__device_8h_source.html
index cd95984d0..511a32e77 100644
--- a/docs/reference/api/doxygen/virtual__device_8h_source.html
+++ b/docs/reference/api/doxygen/virtual__device_8h_source.html
@@ -100,7 +100,7 @@ $(function() {
 <div class="ttc" id="object_8h_html_a782d0de62fbf75736e29c1e79c22c7f1"><div class="ttname"><a href="object_8h.html#a782d0de62fbf75736e29c1e79c22c7f1">TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS</a></div><div class="ttdeci">#define TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(TypeName, ParentType, ObjectName)</div><div class="ttdef"><b>Definition:</b> object.h:728</div></div>
 <div class="ttc" id="classtvm_1_1VirtualDeviceNode_html_acbae67e35fb5348e30101ae23073ad52"><div class="ttname"><a href="classtvm_1_1VirtualDeviceNode.html#acbae67e35fb5348e30101ae23073ad52">tvm::VirtualDeviceNode::IsFullyUnconstrained</a></div><div class="ttdeci">bool IsFullyUnconstrained() const</div><div class="ttdoc">Returns true if virtual device is &amp;#39;fully unconstrained&amp;#39;, ie no target/device type, device id or memory scope is specified. </div><div class="ttdef"><b>Def [...]
 <div class="ttc" id="namespacetvm_html_ab3c85920678b8ba5d925d386b66c0261"><div class="ttname"><a href="namespacetvm.html#ab3c85920678b8ba5d925d386b66c0261">tvm::kInvalidDeviceType</a></div><div class="ttdeci">constexpr DLDeviceType kInvalidDeviceType</div><div class="ttdef"><b>Definition:</b> ndarray.h:51</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 <div class="ttc" id="classtvm_1_1VirtualDeviceNode_html_ac3324e6e3ddb8938f7e98c9312b44c64"><div class="ttname"><a href="classtvm_1_1VirtualDeviceNode.html#ac3324e6e3ddb8938f7e98c9312b44c64">tvm::VirtualDeviceNode::ToDevice</a></div><div class="ttdeci">Device ToDevice() const</div><div class="ttdoc">Returns the (virtual) Device implied by this VirtualDevice. Both the device_type and virtual_device_m...</div><div class="ttdef"><b>Definition:</b> virtual_device.h:231</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/reference/api/doxygen/vision_8h_source.html b/docs/reference/api/doxygen/vision_8h_source.html
index 766f809f0..4d6be2c21 100644
--- a/docs/reference/api/doxygen/vision_8h_source.html
+++ b/docs/reference/api/doxygen/vision_8h_source.html
@@ -126,7 +126,7 @@ $(function() {
 <div class="ttc" id="structtvm_1_1relay_1_1YoloReorgAttrs_html"><div class="ttname"><a href="structtvm_1_1relay_1_1YoloReorgAttrs.html">tvm::relay::YoloReorgAttrs</a></div><div class="ttdoc">Attributes used in yolo reorg operators. </div><div class="ttdef"><b>Definition:</b> vision.h:180</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs_html_ad9e2e281399a290410757e948784b93a"><div class="ttname"><a href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html#ad9e2e281399a290410757e948784b93a">tvm::relay::NonMaximumSuppressionAttrs::top_k</a></div><div class="ttdeci">int top_k</div><div class="ttdef"><b>Definition:</b> vision.h:90</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1MultiBoxPriorAttrs_html_a9f31d593bd3f3706e2adefea5982295b"><div class="ttname"><a href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html#a9f31d593bd3f3706e2adefea5982295b">tvm::relay::MultiBoxPriorAttrs::TVM_DECLARE_ATTRS</a></div><div class="ttdeci">TVM_DECLARE_ATTRS(MultiBoxPriorAttrs, &quot;relay.attrs.MultiBoxPriorAttrs&quot;)</div><div class="ttdef"><b>Definition:</b> vision.h:43</div></div>
-<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:404</div></div>
+<div class="ttc" id="classtvm_1_1Integer_html"><div class="ttname"><a href="classtvm_1_1Integer.html">tvm::Integer</a></div><div class="ttdoc">Container of constant int that adds more constructors. </div><div class="ttdef"><b>Definition:</b> expr.h:618</div></div>
 <div class="ttc" id="structtvm_1_1relay_1_1ROIPoolAttrs_html_ac77f024584d4e31435033d0a150bbde4"><div class="ttname"><a href="structtvm_1_1relay_1_1ROIPoolAttrs.html#ac77f024584d4e31435033d0a150bbde4">tvm::relay::ROIPoolAttrs::layout</a></div><div class="ttdeci">std::string layout</div><div class="ttdef"><b>Definition:</b> vision.h:163</div></div>
 </div><!-- fragment --></div><!-- contents -->
 <!-- start footer part -->
diff --git a/docs/reference/api/python/auto_scheduler.html b/docs/reference/api/python/auto_scheduler.html
index fc1636191..3871c6469 100644
--- a/docs/reference/api/python/auto_scheduler.html
+++ b/docs/reference/api/python/auto_scheduler.html
@@ -1597,7 +1597,7 @@ history states as starting point to perform Evolutionary Search).</p></li>
 
 <dl class="py class">
 <dt class="sig sig-object py" id="tvm.auto_scheduler.SketchPolicy">
-<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">SketchPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">program_cost_model</span></span><span class="o"><span class="pre">=</span></span><span class="defau [...]
+<em class="property"><span class="pre">class</span> </em><span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">SketchPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">program_cost_model</span></span><span class="o"><span class="pre">=</span></span><span class="defau [...]
 <dd><p>The search policy that searches in a hierarchical search space defined by sketches.
 The policy randomly samples programs from the space defined by sketches and use evolutionary
 search to fine-tune them.</p>
@@ -1881,7 +1881,7 @@ Candidates:
 
 <dl class="py function">
 <dt class="sig sig-object py" id="tvm.auto_scheduler.auto_schedule">
-<span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">auto_schedule</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">search_policy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em clas [...]
+<span class="sig-prename descclassname"><span class="pre">tvm.auto_scheduler.</span></span><span class="sig-name descname"><span class="pre">auto_schedule</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">task</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">search_policy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em clas [...]
 <dd><p>THIS API IS DEPRECATED.</p>
 <p>Run auto scheduling search for a task.</p>
 <dl class="field-list simple">
diff --git a/docs/reference/api/typedoc/classes/bytestreamreader.html b/docs/reference/api/typedoc/classes/bytestreamreader.html
index c386b612a..8fdc46313 100644
--- a/docs/reference/api/typedoc/classes/bytestreamreader.html
+++ b/docs/reference/api/typedoc/classes/bytestreamreader.html
@@ -119,7 +119,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -141,7 +141,7 @@
 					<div class="tsd-signature tsd-kind-icon">bytes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Uint8Array</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/rpc_server.ts#L43">rpc_server.ts:43</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -151,7 +151,7 @@
 					<div class="tsd-signature tsd-kind-icon">offset<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span><span class="tsd-signature-symbol"> = 0</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/rpc_server.ts#L42">rpc_server.ts:42</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/rpc_server.ts#L42">rpc_server.ts:42</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -168,7 +168,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/rpc_server.ts#L63">rpc_server.ts:63</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/rpc_server.ts#L63">rpc_server.ts:63</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">Uint8Array</span></h4>
@@ -185,7 +185,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/rpc_server.ts#L49">rpc_server.ts:49</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/rpc_server.ts#L49">rpc_server.ts:49</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -202,7 +202,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/rpc_server.ts#L57">rpc_server.ts:57</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/rpc_server.ts#L57">rpc_server.ts:57</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
diff --git a/docs/reference/api/typedoc/classes/cachedcallstack.html b/docs/reference/api/typedoc/classes/cachedcallstack.html
index 1fcf9afa3..d872a1d33 100644
--- a/docs/reference/api/typedoc/classes/cachedcallstack.html
+++ b/docs/reference/api/typedoc/classes/cachedcallstack.html
@@ -144,7 +144,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L223">memory.ts:223</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L223">memory.ts:223</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -172,7 +172,7 @@
 					<div class="tsd-signature tsd-kind-icon">temp<wbr>Args<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><a href="../interfaces/disposable.html" class="tsd-signature-type">Disposable</a><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = []</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L208">memory.ts:208</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L208">memory.ts:208</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -194,7 +194,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L312">memory.ts:312</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L312">memory.ts:312</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -226,7 +226,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L284">memory.ts:284</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L284">memory.ts:284</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -262,7 +262,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L388">memory.ts:388</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L388">memory.ts:388</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -300,7 +300,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L376">memory.ts:376</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L376">memory.ts:376</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -340,7 +340,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L267">memory.ts:267</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L267">memory.ts:267</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -373,7 +373,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L243">memory.ts:243</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L243">memory.ts:243</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -390,7 +390,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L321">memory.ts:321</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L321">memory.ts:321</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -422,7 +422,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L252">memory.ts:252</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L252">memory.ts:252</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -444,7 +444,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L359">memory.ts:359</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L359">memory.ts:359</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -470,7 +470,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L342">memory.ts:342</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L342">memory.ts:342</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -496,7 +496,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L350">memory.ts:350</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L350">memory.ts:350</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -522,7 +522,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L326">memory.ts:326</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L326">memory.ts:326</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -548,7 +548,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L363">memory.ts:363</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L363">memory.ts:363</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -574,7 +574,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L346">memory.ts:346</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L346">memory.ts:346</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -600,7 +600,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/memory.ts#L334">memory.ts:334</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/memory.ts#L334">memory.ts:334</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
diff --git a/docs/reference/api/typedoc/classes/dldatatype.html b/docs/reference/api/typedoc/classes/dldatatype.html
index caa4e90d6..7b94cf2cd 100644
--- a/docs/reference/api/typedoc/classes/dldatatype.html
+++ b/docs/reference/api/typedoc/classes/dldatatype.html
@@ -119,7 +119,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L262">runtime.ts:262</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L262">runtime.ts:262</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -147,7 +147,7 @@
 					<div class="tsd-signature tsd-kind-icon">bits<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L260">runtime.ts:260</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L260">runtime.ts:260</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">code<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L258">runtime.ts:258</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L258">runtime.ts:258</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -177,7 +177,7 @@
 					<div class="tsd-signature tsd-kind-icon">lanes<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L262">runtime.ts:262</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L262">runtime.ts:262</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -199,7 +199,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L279">runtime.ts:279</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L279">runtime.ts:279</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
@@ -216,7 +216,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L270">runtime.ts:270</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L270">runtime.ts:270</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">string</span></h4>
diff --git a/docs/reference/api/typedoc/classes/dldevice.html b/docs/reference/api/typedoc/classes/dldevice.html
index a27a59459..24519cd8e 100644
--- a/docs/reference/api/typedoc/classes/dldevice.html
+++ b/docs/reference/api/typedoc/classes/dldevice.html
@@ -118,7 +118,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L202">runtime.ts:202</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L202">runtime.ts:202</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -146,7 +146,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<wbr>Id<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L200">runtime.ts:200</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L200">runtime.ts:200</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -161,7 +161,7 @@
 					<div class="tsd-signature tsd-kind-icon">device<wbr>Type<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">number</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L198">runtime.ts:198</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L198">runtime.ts:198</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -183,7 +183,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L223">runtime.ts:223</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L223">runtime.ts:223</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -205,7 +205,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L230">runtime.ts:230</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L230">runtime.ts:230</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">string</span></h4>
diff --git a/docs/reference/api/typedoc/classes/environment.html b/docs/reference/api/typedoc/classes/environment.html
index 78a97e6f9..d73e47d92 100644
--- a/docs/reference/api/typedoc/classes/environment.html
+++ b/docs/reference/api/typedoc/classes/environment.html
@@ -125,7 +125,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/environment.ts#L86">environment.ts:86</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/environment.ts#L86">environment.ts:86</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -169,7 +169,7 @@
 					<aside class="tsd-sources">
 						<p>Implementation of <a href="../interfaces/libraryprovider.html">LibraryProvider</a>.<a href="../interfaces/libraryprovider.html#imports">imports</a></p>
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/environment.ts#L70">environment.ts:70</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/environment.ts#L70">environment.ts:70</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 					<div class="tsd-signature tsd-kind-icon">logger<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-symbol">(</span>msg<span class="tsd-signature-symbol">: </span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">)</span><span class="tsd-signature-symbol"> =&gt; </span><span class="tsd-signature-type">void</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/environment.ts#L69">environment.ts:69</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/environment.ts#L69">environment.ts:69</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-type-declaration">
@@ -210,7 +210,7 @@
 					<div class="tsd-signature tsd-kind-icon">packedCFunc<wbr>Table<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">ctypes.FTVMWasmPackedCFunc</span><span class="tsd-signature-symbol"> | </span><span class="tsd-signature-type">undefined</span><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = [undefined,]</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/environment.ts#L78">environment.ts:78</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/environment.ts#L78">environment.ts:78</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -228,7 +228,7 @@
 					<div class="tsd-signature tsd-kind-icon">packedCFunc<wbr>Table<wbr>Free<wbr>Id<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Array</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">number</span><span class="tsd-signature-symbol">&gt;</span><span class="tsd-signature-symbol"> = []</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/environment.ts#L84">environment.ts:84</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/environment.ts#L84">environment.ts:84</a></li>
 						</ul>
 					</aside>
 					<div class="tsd-comment tsd-typography">
@@ -250,7 +250,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/environment.ts#L105">environment.ts:105</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/environment.ts#L105">environment.ts:105</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
diff --git a/docs/reference/api/typedoc/classes/ffilibrary.html b/docs/reference/api/typedoc/classes/ffilibrary.html
index 395aebf4f..28b6011aa 100644
--- a/docs/reference/api/typedoc/classes/ffilibrary.html
+++ b/docs/reference/api/typedoc/classes/ffilibrary.html
@@ -131,7 +131,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L49">runtime.ts:49</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L49">runtime.ts:49</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -156,7 +156,7 @@
 					<div class="tsd-signature tsd-kind-icon">exports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">Function</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L46">runtime.ts:46</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L46">runtime.ts:46</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -166,7 +166,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L45">runtime.ts:45</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L45">runtime.ts:45</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -176,7 +176,7 @@
 					<div class="tsd-signature tsd-kind-icon">wasm32<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">boolean</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L44">runtime.ts:44</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L44">runtime.ts:44</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -186,7 +186,7 @@
 					<div class="tsd-signature tsd-kind-icon">webGPUContext<span class="tsd-signature-symbol">:</span> <a href="webgpucontext.html" class="tsd-signature-type">WebGPUContext</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L47">runtime.ts:47</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L47">runtime.ts:47</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -203,7 +203,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L76">runtime.ts:76</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L76">runtime.ts:76</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -226,7 +226,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L66">runtime.ts:66</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L66">runtime.ts:66</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -243,7 +243,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L84">runtime.ts:84</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L84">runtime.ts:84</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <a href="cachedcallstack.html" class="tsd-signature-type">CachedCallStack</a></h4>
@@ -260,7 +260,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L95">runtime.ts:95</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L95">runtime.ts:95</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-parameters-title">Parameters</h4>
@@ -283,7 +283,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L72">runtime.ts:72</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L72">runtime.ts:72</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">number</span></h4>
diff --git a/docs/reference/api/typedoc/classes/graphexecutor.html b/docs/reference/api/typedoc/classes/graphexecutor.html
index bfab83501..66d7e6713 100644
--- a/docs/reference/api/typedoc/classes/graphexecutor.html
+++ b/docs/reference/api/typedoc/classes/graphexecutor.html
@@ -130,7 +130,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L583">runtime.ts:583</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L583">runtime.ts:583</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -162,7 +162,7 @@
 					<div class="tsd-signature tsd-kind-icon">module<span class="tsd-signature-symbol">:</span> <a href="module.html" class="tsd-signature-type">Module</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L579">runtime.ts:579</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L579">runtime.ts:579</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -179,7 +179,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L654">runtime.ts:654</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L654">runtime.ts:654</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -224,7 +224,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L597">runtime.ts:597</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L597">runtime.ts:597</a></li>
 								</ul>
 							</aside>
 							<h4 class="tsd-returns-title">Returns <span class="tsd-signature-type">void</span></h4>
@@ -241,7 +241,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L631">runtime.ts:631</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L631">runtime.ts:631</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -279,7 +279,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L644">runtime.ts:644</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L644">runtime.ts:644</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -310,7 +310,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L621">runtime.ts:621</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L621">runtime.ts:621</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -332,7 +332,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L609">runtime.ts:609</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L609">runtime.ts:609</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
diff --git a/docs/reference/api/typedoc/classes/instance.html b/docs/reference/api/typedoc/classes/instance.html
index dd3edd11b..a784007ab 100644
--- a/docs/reference/api/typedoc/classes/instance.html
+++ b/docs/reference/api/typedoc/classes/instance.html
@@ -139,7 +139,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
 								<ul>
-									<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L692">runtime.ts:692</a></li>
+									<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L692">runtime.ts:692</a></li>
 								</ul>
 							</aside>
 							<div class="tsd-comment tsd-typography">
@@ -202,7 +202,7 @@
 					<div class="tsd-signature tsd-kind-icon">exports<span class="tsd-signature-symbol">:</span> <span class="tsd-signature-type">Record</span><span class="tsd-signature-symbol">&lt;</span><span class="tsd-signature-type">string</span><span class="tsd-signature-symbol">, </span><span class="tsd-signature-type">Function</span><span class="tsd-signature-symbol">&gt;</span></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L684">runtime.ts:684</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L684">runtime.ts:684</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -212,7 +212,7 @@
 					<div class="tsd-signature tsd-kind-icon">memory<span class="tsd-signature-symbol">:</span> <a href="memory.html" class="tsd-signature-type">Memory</a></div>
 					<aside class="tsd-sources">
 						<ul>
-							<li>Defined in <a href="https://github.com/apache/tvm/blob/c54eea7d0/web/src/runtime.ts#L683">runtime.ts:683</a></li>
+							<li>Defined in <a href="https://github.com/apache/tvm/blob/9c7aaace4/web/src/runtime.ts#L683">runtime.ts:683</a></li>
 						</ul>
 					</aside>
 				</section>
@@ -229,7 +229,7 @@
 						<li class="tsd-description">
 							<aside class="tsd-sources">
... 2323 lines suppressed ...