You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/06/24 06:56:39 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@d2cbdf381b68134951bfd7525c6a3a67838e5bdf)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 2c8aab737 deploying docs (apache/tvm@d2cbdf381b68134951bfd7525c6a3a67838e5bdf)
2c8aab737 is described below

commit 2c8aab737fb2953c4c9ef35527a67ee729e5afe2
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Fri Jun 24 06:56:31 2022 +0000

    deploying docs (apache/tvm@d2cbdf381b68134951bfd7525c6a3a67838e5bdf)
---
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    2 +-
 .../how_to/compile_models/from_paddle.rst.txt      |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   16 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |   10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 |  970 +------
 .../tune_network_cuda.rst.txt                      |    2 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |   84 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |    8 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   34 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |    8 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    6 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |    2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   16 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    6 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |    2 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |   20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   54 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   22 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   44 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       |   73 +-
 docs/how_to/compile_models/from_paddle.html        |    2 +-
 docs/how_to/compile_models/from_pytorch.html       |   11 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   21 +-
 docs/how_to/deploy_models/deploy_prequantized.html |   12 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   37 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   16 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |   10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    |  970 +------
 .../tune_with_autoscheduler/tune_network_cuda.html |    2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |   84 +-
 .../tune_with_autotvm/sg_execution_times.html      |    8 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   34 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |    8 +-
 .../how_to/work_with_relay/sg_execution_times.html |    6 +-
 docs/how_to/work_with_schedules/intrin_math.html   |    2 +-
 .../work_with_schedules/sg_execution_times.html    |   16 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 .../api/doxygen/apply__history__best_8h.html       |    2 +-
 .../api/doxygen/apply__history__best_8h__incl.svg  | 1226 ++++-----
 docs/reference/api/doxygen/arg__info_8h.html       |    5 +-
 .../api/doxygen/arg__info_8h__dep__incl.svg        |  140 +-
 docs/reference/api/doxygen/arg__info_8h__incl.svg  | 2575 ++++++++++---------
 .../reference/api/doxygen/arg__info_8h_source.html |   21 +-
 docs/reference/api/doxygen/array_8h.html           |    2 +-
 docs/reference/api/doxygen/array_8h__dep__incl.svg | 1451 +++++------
 .../api/doxygen/c__runtime__api_8h__dep__incl.svg  |  132 +-
 ...sstvm_1_1meta__schedule_1_1ArgInfo-members.html |   31 +-
 .../classtvm_1_1meta__schedule_1_1ArgInfo.html     |   53 +-
 ...m_1_1meta__schedule_1_1ArgInfo__coll__graph.svg |  105 +-
 ..._1meta__schedule_1_1ArgInfo__inherit__graph.svg |   81 +-
 ...vm_1_1meta__schedule_1_1TensorInfo-members.html |   35 +-
 .../classtvm_1_1meta__schedule_1_1TensorInfo.html  |    7 +-
 ..._1meta__schedule_1_1TensorInfo__coll__graph.svg |  109 +-
 ...eta__schedule_1_1TensorInfo__inherit__graph.svg |   81 +-
 docs/reference/api/doxygen/data__type_8h.html      |    2 +-
 .../api/doxygen/data__type_8h__dep__incl.svg       | 1702 ++++++------
 docs/reference/api/doxygen/database_8h.html        |    2 +-
 docs/reference/api/doxygen/database_8h__incl.svg   | 1096 ++++----
 docs/reference/api/doxygen/dir_000004_000007.html  |    2 +-
 .../dir_4378f18824ae7d4ad48f8d7785cd7ac8_dep.svg   |    4 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |    4 +-
 .../api/doxygen/feature__extractor_8h.html         |    2 +-
 .../api/doxygen/feature__extractor_8h__incl.svg    | 2147 ++++++++--------
 docs/reference/api/doxygen/functions_f.html        |    3 +
 docs/reference/api/doxygen/functions_func_f.html   |    5 +-
 docs/reference/api/doxygen/functions_func_t.html   |    2 +-
 docs/reference/api/doxygen/functions_func_v.html   |    4 +-
 docs/reference/api/doxygen/functions_s.html        |    2 +-
 docs/reference/api/doxygen/functions_t.html        |    4 +-
 docs/reference/api/doxygen/functions_v.html        |   12 +-
 .../api/doxygen/functor_8h__dep__incl.svg          |  168 +-
 docs/reference/api/doxygen/ir_2adt_8h.html         |    2 +-
 .../api/doxygen/ir_2adt_8h__dep__incl.svg          | 1216 ++++-----
 docs/reference/api/doxygen/ir_2attrs_8h.html       |    2 +-
 .../api/doxygen/ir_2attrs_8h__dep__incl.svg        |  744 +++---
 .../api/doxygen/ir_2expr_8h__dep__incl.svg         |  112 +-
 docs/reference/api/doxygen/ir_2function_8h.html    |    2 +-
 .../api/doxygen/ir_2function_8h__dep__incl.svg     | 1132 ++++----
 docs/reference/api/doxygen/ir_2module_8h.html      |    2 +-
 .../api/doxygen/ir_2module_8h__dep__incl.svg       | 1218 ++++-----
 .../api/doxygen/ir_2span_8h__dep__incl.svg         |  112 +-
 docs/reference/api/doxygen/ir_2type_8h.html        |    2 +-
 .../api/doxygen/ir_2type_8h__dep__incl.svg         | 1290 +++++-----
 docs/reference/api/doxygen/map_8h.html             |    2 +-
 docs/reference/api/doxygen/map_8h__dep__incl.svg   | 1582 ++++++------
 .../api/doxygen/measure__callback_8h.html          |    2 +-
 .../api/doxygen/measure__callback_8h__incl.svg     | 1378 +++++-----
 .../api/doxygen/measure__candidate_8h.html         |    2 +-
 .../api/doxygen/measure__candidate_8h__incl.svg    | 2437 +++++++++---------
 .../doxygen/meta__schedule_2cost__model_8h.html    |    2 +-
 .../meta__schedule_2cost__model_8h__incl.svg       | 2501 +++++++++---------
 docs/reference/api/doxygen/node_8h.html            |    2 +-
 docs/reference/api/doxygen/node_8h__dep__incl.svg  | 1344 +++++-----
 docs/reference/api/doxygen/object_8h.html          |    2 +-
 .../reference/api/doxygen/object_8h__dep__incl.svg | 1978 +++++++-------
 docs/reference/api/doxygen/optional_8h.html        |    2 +-
 .../api/doxygen/optional_8h__dep__incl.svg         | 1289 +++++-----
 .../api/doxygen/packed__func_8h__dep__incl.svg     |  128 +-
 .../api/doxygen/registry_8h__dep__incl.svg         |   24 +-
 docs/reference/api/doxygen/repr__printer_8h.html   |    2 +-
 .../api/doxygen/repr__printer_8h__dep__incl.svg    | 1296 +++++-----
 docs/reference/api/doxygen/runner_8h.html          |    2 +-
 .../reference/api/doxygen/runner_8h__dep__incl.svg |   72 +-
 docs/reference/api/doxygen/runner_8h__incl.svg     | 1808 +++++++------
 .../api/doxygen/runtime_2container_2adt_8h.html    |    2 +-
 .../runtime_2container_2adt_8h__dep__incl.svg      | 1211 ++++-----
 .../api/doxygen/runtime_2container_2base_8h.html   |    2 +-
 .../runtime_2container_2base_8h__dep__incl.svg     | 1874 +++++++-------
 docs/reference/api/doxygen/runtime_2memory_8h.html |    2 +-
 .../api/doxygen/runtime_2memory_8h__dep__incl.svg  | 1668 ++++++------
 .../api/doxygen/runtime_2module_8h__dep__incl.svg  |  128 +-
 docs/reference/api/doxygen/search/all_10.js        |    2 +-
 docs/reference/api/doxygen/search/all_11.js        |    2 +-
 docs/reference/api/doxygen/search/all_14.js        |    8 +-
 docs/reference/api/doxygen/search/all_15.js        |    6 +-
 docs/reference/api/doxygen/search/all_16.js        |    2 +-
 docs/reference/api/doxygen/search/all_17.js        |    4 +-
 docs/reference/api/doxygen/search/all_7.js         |    1 +
 docs/reference/api/doxygen/search/functions_10.js  |    2 +-
 docs/reference/api/doxygen/search/functions_13.js  |    2 +-
 docs/reference/api/doxygen/search/functions_14.js  |    2 +-
 docs/reference/api/doxygen/search/functions_15.js  |    2 +-
 docs/reference/api/doxygen/search/functions_16.js  |    2 +-
 docs/reference/api/doxygen/search/functions_6.js   |    1 +
 docs/reference/api/doxygen/search/functions_f.js   |    2 +-
 .../reference/api/doxygen/search__strategy_8h.html |    2 +-
 .../api/doxygen/search__strategy_8h__incl.svg      | 2696 ++++++++++----------
 docs/reference/api/doxygen/source__map_8h.html     |    2 +-
 .../api/doxygen/source__map_8h__dep__incl.svg      | 1226 ++++-----
 .../reference/api/doxygen/string_8h__dep__incl.svg |  300 +--
 .../doxygen/structural__equal_8h__dep__incl.svg    |   36 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg |   36 +-
 docs/reference/api/doxygen/task__scheduler_8h.html |    2 +-
 .../api/doxygen/task__scheduler_8h__incl.svg       | 1470 +++++------
 docs/reference/api/doxygen/tune__context_8h.html   |    2 +-
 .../api/doxygen/tune__context_8h__incl.svg         | 1392 +++++-----
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    6 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    2 +-
 docs/tutorial/autotvm_matmul_x86.html              |   20 +-
 docs/tutorial/autotvm_relay_x86.html               |  258 +-
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   26 +-
 docs/tutorial/tensor_expr_get_started.html         |   44 +-
 220 files changed, 23981 insertions(+), 24786 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index caedc7cf8..6bdbdba32 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -114,7 +114,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipa032f37d-d3ba-49fe-9bf4-ee085ae5cd9d from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipbd6d80df-b392-4d2e-bfba-6feeb2255041 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index 1bd03f8d1..b2e68dd77 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -112,7 +112,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<07:38, 94.9kB/s]
      0%|          | 48.0k/41.5M [00:00<04:49, 150kB/s] 
      0%|          | 96.0k/41.5M [00:00<03:25, 211kB/s]
      0%|          | 200k/41.5M [00:00<01:57, 370kB/s] 
      1%|          | 408k/41.5M [00:00<01:03, 679kB/s]
      2%|1         | 832k/41.5M [00:01<00:32, 1.30MB/s]
      4%|3         | 1.64M/41.5M [00:01<00:16, 2.51MB/s]
      7%|7         | 3.11M/41.5M [00:01<00:08, 4.54MB/s]
     11%|#1        | 4.59M/41.5M [00:01<00:06, 5.90MB/s]
     15%|#4        | 6.05M/41.5M [00:01<00:05, 6.82MB/s]
     18%|#8        | 7.52M/41.5M [00:01<00:04, 7.45MB/s]
     22%|##1       | 9.00M/41.5M [00:02<00:04, 7.89MB/s]
     25%|##5       | 10.5M/41.5M [00:02<00:03, 8.19MB/s]
     29%|##8       | 11.9M/41.5M [00:02<00:03, 8.40MB/s]
     32%|###2      | 13.4M/41.5M [00:02<00:03, 8.54MB/s]
     36%|###5      | 14.9M/41.5M [00:02<00:03, 8.64MB/s]
     39%|###9      | 16.3M/41.5M [00:02<0
 0:03, 8.71MB/s]
     43%|####2     | 17.8M/41.5M [00:03<00:02, 8.77MB/s]
     46%|####6     | 19.3M/41.5M [00:03<00:02, 8.80MB/s]
     50%|#####     | 20.8M/41.5M [00:03<00:02, 8.82MB/s]
     54%|#####3    | 22.2M/41.5M [00:03<00:02, 8.84MB/s]
     57%|#####7    | 23.7M/41.5M [00:03<00:02, 8.85MB/s]
     61%|######    | 25.2M/41.5M [00:03<00:01, 8.85MB/s]
     64%|######4   | 26.6M/41.5M [00:04<00:01, 8.86MB/s]
     68%|######7   | 28.1M/41.5M [00:04<00:01, 8.87MB/s]
     71%|#######1  | 29.6M/41.5M [00:04<00:01, 10.0MB/s]
     74%|#######4  | 30.8M/41.5M [00:04<00:01, 10.6MB/s]
     77%|#######6  | 31.9M/41.5M [00:04<00:01, 9.68MB/s]
     79%|#######9  | 32.9M/41.5M [00:04<00:01, 8.44MB/s]
     82%|########1 | 34.0M/41.5M [00:05<00:00, 7.96MB/s]
     85%|########5 | 35.4M/41.5M [00:05<00:00, 8.26MB/s]
     89%|########8 | 36.9M/41.5M [00:05<00:00, 8.46MB/s]
     93%|#########2| 38.4M/41.5M [00:05<00:00, 8.58MB/s]
     96%|#########6| 39.9M/41.5M [00:05<00:00, 8.67MB/s]
    100%|###
 ######9| 41.3M/41.5M [00:05<00:00, 8.74MB/s]
    100%|##########| 41.5M/41.5M [00:05<00:00, 7.37MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
      0%|          | 16.0k/41.5M [00:00<08:27, 85.6kB/s]
      0%|          | 48.0k/41.5M [00:00<05:20, 136kB/s] 
      0%|          | 96.0k/41.5M [00:00<03:48, 190kB/s]
      0%|          | 160k/41.5M [00:00<02:53, 250kB/s] 
      1%|          | 272k/41.5M [00:00<01:55, 375kB/s]
      1%|1         | 520k/41.5M [00:01<01:01, 696kB/s]
      2%|2         | 0.99M/41.5M [00:01<00:31, 1.33MB/s]
      5%|4         | 1.98M/41.5M [00:01<00:15, 2.63MB/s]
      8%|8         | 3.48M/41.5M [00:01<00:09, 4.35MB/s]
     12%|#1        | 4.95M/41.5M [00:01<00:06, 5.49MB/s]
     16%|#5        | 6.45M/41.5M [00:02<00:05, 6.31MB/s]
     19%|#9        | 7.95M/41.5M [00:02<00:05, 6.86MB/s]
     23%|##2       | 9.44M/41.5M [00:02<00:04, 7.25MB/s]
     26%|##6       | 10.9M/41.5M [00:02<00:04, 7.52MB/s]
     30%|##9       | 12.4M/41.5M [00:02<00:03, 7.70MB/s]
     34%|###3      | 13.9M/41.5M [00:03<00:03, 7.83MB/s]
     37%|###7      | 15.4M/41.5M [00:03<00
 :03, 7.92MB/s]
     41%|####      | 16.9M/41.5M [00:03<00:03, 7.99MB/s]
     44%|####4     | 18.4M/41.5M [00:03<00:03, 8.03MB/s]
     48%|####7     | 19.9M/41.5M [00:03<00:02, 8.06MB/s]
     52%|#####1    | 21.4M/41.5M [00:04<00:02, 8.09MB/s]
     55%|#####5    | 22.9M/41.5M [00:04<00:02, 8.82MB/s]
     59%|#####8    | 24.3M/41.5M [00:04<00:01, 9.86MB/s]
     61%|######1   | 25.3M/41.5M [00:04<00:01, 9.15MB/s]
     63%|######3   | 26.2M/41.5M [00:04<00:02, 7.74MB/s]
     66%|######5   | 27.3M/41.5M [00:04<00:01, 7.98MB/s]
     69%|######9   | 28.8M/41.5M [00:04<00:01, 9.32MB/s]
     72%|#######1  | 29.7M/41.5M [00:05<00:01, 8.59MB/s]
     74%|#######3  | 30.6M/41.5M [00:05<00:01, 7.25MB/s]
     77%|#######6  | 31.8M/41.5M [00:05<00:01, 7.08MB/s]
     80%|########  | 33.3M/41.5M [00:05<00:01, 7.43MB/s]
     84%|########3 | 34.8M/41.5M [00:05<00:00, 7.65MB/s]
     87%|########7 | 36.3M/41.5M [00:05<00:00, 7.80MB/s]
     91%|#########1| 37.8M/41.5M [00:06<00:00, 7.91MB/s]
     95%|####
 #####4| 39.3M/41.5M [00:06<00:00, 7.96MB/s]
     98%|#########8| 40.8M/41.5M [00:06<00:00, 8.01MB/s]
    100%|##########| 41.5M/41.5M [00:06<00:00, 6.64MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_paddle.rst.txt b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
index 11ab41566..4f97f5bed 100644
--- a/docs/_sources/how_to/compile_models/from_paddle.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_paddle.rst.txt
@@ -235,7 +235,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  7.018 seconds)
+   **Total running time of the script:** ( 1 minutes  7.415 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_paddle.py:
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index ac1f18241..c04b0203c 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -93,7 +93,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     43%|####2     | 19.0M/44.7M [00:00<00:00, 200MB/s]
     94%|#########4| 42.1M/44.7M [00:00<00:00, 225MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 223MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
      3%|2         | 1.14M/44.7M [00:00<00:03, 11.9MB/s]
      7%|7         | 3.14M/44.7M [00:00<00:02, 17.2MB/s]
     15%|#4        | 6.49M/44.7M [00:00<00:01, 25.2MB/s]
     26%|##6       | 11.8M/44.7M [00:00<00:00, 37.3MB/s]
     46%|####6     | 20.8M/44.7M [00:00<00:00, 57.6MB/s]
     64%|######3   | 28.5M/44.7M [00:00<00:00, 64.6MB/s]
     95%|#########4| 42.4M/44.7M [00:00<00:00, 90.9MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 65.0MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 7b2981fa9..727d7a007 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -422,7 +422,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  2.395 seconds)
+   **Total running time of the script:** ( 1 minutes  2.449 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index 3ff8a8d7f..c9c9d2908 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:24.208** total execution time for **how_to_compile_models** files:
+**05:30.277** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 01:07.018 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 01:07.415 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:02.395 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:02.449 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 00:57.323 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 00:59.398 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:32.183 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:32.694 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.387 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:24.820 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:23.782 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:23.188 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:21.703 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:22.603 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:19.742 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:20.546 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:13.015 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:14.633 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.659 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.532 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index be89257a8..f3e446686 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -440,7 +440,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.8272      16.9435      17.2626      16.0832       0.3775   
+      16.6404      16.6547      16.7156      16.5017       0.0678   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index ef8ec549c..0f832be07 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -122,7 +122,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
     11%|#1        | 18.8M/170M [00:00<00:00, 197MB/s]
     25%|##4       | 42.3M/170M [00:00<00:00, 226MB/s]
     38%|###7      | 63.9M/170M [00:00<00:00, 204MB/s]
     53%|#####3    | 90.0M/170M [00:00<00:00, 230MB/s]
     66%|######6   | 112M/170M [00:00<00:00, 210MB/s] 
     78%|#######8  | 133M/170M [00:00<00:00, 189MB/s]
     89%|########9 | 151M/170M [00:00<00:00, 188MB/s]
    100%|##########| 170M/170M [00:00<00:00, 206MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      2%|1         | 2.94M/170M [00:00<00:05, 30.6MB/s]
      5%|5         | 8.88M/170M [00:00<00:03, 49.2MB/s]
     12%|#1        | 20.1M/170M [00:00<00:01, 80.6MB/s]
     23%|##2       | 38.7M/170M [00:00<00:01, 125MB/s] 
     34%|###4      | 58.3M/170M [00:00<00:00, 154MB/s]
     46%|####5     | 77.7M/170M [00:00<00:00, 171MB/s]
     57%|#####6    | 96.7M/170M [00:00<00:00, 180MB/s]
     68%|######8   | 116M/170M [00:00<00:00, 187MB/s] 
     80%|#######9  | 135M/170M [00:00<00:00, 192MB/s]
     91%|#########1| 155M/170M [00:01<00:00, 196MB/s]
    100%|##########| 170M/170M [00:01<00:00, 165MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -291,7 +291,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  55.642 seconds)
+   **Total running time of the script:** ( 3 minutes  6.174 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index a119fb668..624552f26 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -219,7 +219,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     28%|##7       | 3.76M/13.6M [00:00<00:00, 39.4MB/s]
     76%|#######5  | 10.2M/13.6M [00:00<00:00, 56.2MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 56.8MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
      6%|6         | 880k/13.6M [00:00<00:01, 8.92MB/s]
     19%|#8        | 2.51M/13.6M [00:00<00:00, 13.8MB/s]
     40%|####      | 5.46M/13.6M [00:00<00:00, 21.6MB/s]
     80%|########  | 10.8M/13.6M [00:00<00:00, 35.3MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 32.6MB/s]
 
 
 
@@ -399,7 +399,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.3291      90.2933      91.0451      90.1698       0.1479   
+      90.5948      90.5228      93.5147      90.3220       0.3339   
                
 
 
@@ -448,7 +448,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  7.132 seconds)
+   **Total running time of the script:** ( 1 minutes  10.519 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index fa1655642..7aeb90723 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -426,7 +426,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      120.2484     120.0720     126.9187     119.4935      0.8613   
+      121.0160     120.9833     122.6799     120.1841      0.4180   
                
 
 
@@ -463,7 +463,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  5.377 seconds)
+   **Total running time of the script:** ( 2 minutes  0.313 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 197d02ec8..73bb612d1 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -254,7 +254,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  12.196 seconds)
+   **Total running time of the script:** ( 1 minutes  14.345 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index ff4f4648e..48585d4ed 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -157,7 +157,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5767/132723 [00:00<00:02, 57666.76KB/s]
     10%|#         | 13759/132723 [00:00<00:01, 70754.06KB/s]
     16%|#6        | 21882/132723 [00:00<00:01, 75535.42KB/s]
     23%|##2       | 30005/132723 [00:00<00:01, 77777.59KB/s]
     29%|##8       | 38151/132723 [00:00<00:01, 79103.05KB/s]
     35%|###4      | 46259/132723 [00:00<00:01, 79772.93KB/s]
     41%|####      | 54350/132723 [00:00<00:00, 80143.23KB/s]
     47%|####7     | 62512/132723 [00:00<00:00, 80610.73KB/s]
     53%|#####3    | 70648/132723 [00:00<00:00, 80842.38KB/s]
     59%|#####9    | 78823/132723 [00:01<00:00, 81119.78KB/s]
     66%|######5   | 86935/132723 [00:01<00:00, 81071.90KB/s]
     72%|#######1  | 95043/132723 [00:01<00:00, 80891.95KB/s]
     78%|#######7  | 103133/132723 [00:01<00:00, 80831.41KB/s]
     84%|########3 | 111221/132723 [00:01<00:00, 80844.69KB/s]
     90%|######### | 119501/132723 [00:01<00:00, 81431.11KB/s]
     96%|########
 #6| 127671/132723 [00:01<00:00, 81509.19KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 79764.98KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|3         | 5263/132723 [00:00<00:02, 52624.28KB/s]
     10%|9         | 13054/132723 [00:00<00:01, 67491.64KB/s]
     16%|#5        | 21085/132723 [00:00<00:01, 73335.68KB/s]
     22%|##1       | 29149/132723 [00:00<00:01, 76214.38KB/s]
     28%|##7       | 37121/132723 [00:00<00:01, 77474.02KB/s]
     34%|###3      | 44869/132723 [00:00<00:01, 76415.03KB/s]
     40%|###9      | 52829/132723 [00:00<00:01, 77441.08KB/s]
     46%|####5     | 60802/132723 [00:00<00:00, 78162.09KB/s]
     52%|#####1    | 68779/132723 [00:00<00:00, 78660.90KB/s]
     58%|#####7    | 76647/132723 [00:01<00:00, 78492.92KB/s]
     64%|######3   | 84498/132723 [00:01<00:00, 78016.11KB/s]
     70%|######9   | 92302/132723 [00:01<00:00, 77820.87KB/s]
     75%|#######5  | 100086/132723 [00:01<00:00, 77590.94KB/s]
     81%|########1 | 107846/132723 [00:01<00:00, 77414.04KB/s]
     87%|########7 | 115588/132723 [00:01<00:00, 77258.94KB/s]
     93%|########
 #2| 123315/132723 [00:01<00:00, 77236.76KB/s]
     99%|#########8| 131081/132723 [00:01<00:00, 77360.57KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 76656.68KB/s]
 
 
 
@@ -240,7 +240,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  17.296 seconds)
+   **Total running time of the script:** ( 2 minutes  24.372 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 25ecc4c2b..e97d5230f 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**10:30.340** total execution time for **how_to_deploy_models** files:
+**10:48.811** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 02:55.642 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:06.174 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:17.296 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:24.372 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:05.377 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:00.313 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:12.196 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:14.345 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:07.132 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:10.519 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:30.069 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:29.684 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.622 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:23.399 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index 9d8758d5a..b6cb30f62 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -463,7 +463,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipf8e938c7-37eb-403a-9fff-f2181100ed36 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipd7544d1b-4b64-40cc-8ec0-ece6d947d485 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 2fcab6c10..c57385466 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:39.974** total execution time for **how_to_extend_tvm** files:
+**00:41.837** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:36.858 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:38.203 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.196 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.362 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.914 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.265 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.006 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.007 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index ad48cd78d..f77ee622f 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -215,10 +215,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6813us [6813us] (46.23%; 46.23%)
-    FoldScaleAxis: 7925us [7us] (53.77%; 53.77%)
-            FoldConstant: 7918us [1609us] (53.73%; 99.91%)
-                    InferType: 6309us [6309us] (42.81%; 79.68%)
+    InferType: 7082us [7082us] (45.55%; 45.55%)
+    FoldScaleAxis: 8467us [8us] (54.45%; 54.45%)
+            FoldConstant: 8459us [1599us] (54.40%; 99.90%)
+                    InferType: 6860us [6860us] (44.12%; 81.10%)
 
 
 
@@ -257,10 +257,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6324us [6324us] (44.61%; 44.61%)
-    FoldScaleAxis: 7853us [5us] (55.39%; 55.39%)
-            FoldConstant: 7848us [1609us] (55.36%; 99.94%)
-                    InferType: 6239us [6239us] (44.01%; 79.49%)
+    InferType: 7021us [7021us] (45.79%; 45.79%)
+    FoldScaleAxis: 8313us [7us] (54.21%; 54.21%)
+            FoldConstant: 8306us [1646us] (54.17%; 99.92%)
+                    InferType: 6660us [6660us] (43.43%; 80.18%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 1b1c61d48..2ccd682e3 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -327,7 +327,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 54.147519 ms
+    Convolution: 54.170101 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index e0c6216bd..cf83eab5d 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -658,7 +658,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 7.228174 ms
+    conv2d with tensor core: 8.953879 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index 5ae0a316e..4810f1da0 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -130,8 +130,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.018909
-    Baseline: 3.540226
+    Numpy running time: 0.019942
+    Baseline: 3.478523
 
 
 
@@ -226,7 +226,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.298191
+    Opt1: 0.326673
 
 
 
@@ -329,7 +329,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.336718
+    Opt2: 0.346013
 
 
 
@@ -425,7 +425,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.119759
+    Opt3: 0.136822
 
 
 
@@ -550,7 +550,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.111192
+    Opt4: 0.112451
 
 
 
@@ -672,7 +672,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.112104
+    Opt5: 0.113851
 
 
 
@@ -797,7 +797,7 @@ Futhermore, we can also utilize multi-core processors to do the thread-level par
 
  .. code-block:: none
 
-    Opt6: 0.145330
+    Opt6: 0.146176
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 7485deac4..aa4906bd1 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:35.000** total execution time for **how_to_optimize_operators** files:
+**00:35.504** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.637 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:33.306 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.323 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.213 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.040 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:00.985 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 9d58c3bee..b4d3c3517 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**05:09.410** total execution time for **how_to_tune_with_autoscheduler** files:
+**05:19.865** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 02:31.305 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 02:38.800 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:20.772 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:22.662 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:43.249 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:43.835 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:16.972 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:16.758 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:08.567 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:08.951 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.545 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.859 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 4978a4af1..b4474226c 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -239,483 +239,43 @@ cooperative fetching, unrolling and operator fusion.
                  compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 28;
-      allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
-        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 16;
+      allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [504]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [768]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 392 {
+        conv2d_nchw_1: Buffer(conv2d_nchw, float32, [4], [], scope="local", align=16)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[2] = 0f32
         conv2d_nchw_1[3] = 0f32
-        conv2d_nchw_1[4] = 0f32
-        conv2d_nchw_1[5] = 0f32
-        conv2d_nchw_1[6] = 0f32
-        conv2d_nchw_1[7] = 0f32
-        conv2d_nchw_1[8] = 0f32
-        conv2d_nchw_1[9] = 0f32
-        conv2d_nchw_1[10] = 0f32
-        conv2d_nchw_1[11] = 0f32
-        conv2d_nchw_1[12] = 0f32
-        conv2d_nchw_1[13] = 0f32
         for (rc.outer.outer: int32, 0, 64) {
-          for (ry.outer.outer: int32, 0, 3) {
-            let cse_var_2: int32 = (rc.outer.outer*72)
-            let cse_var_1: int32 = (ry.outer.outer*3)
-             {
-              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope="shared")[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f3 [...]
-                }
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
-                }
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
-                }
-                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
-                  pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
-                }
+          for (rx.outer.outer: int32, 0, 3) {
+            attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 392;
+            pad_temp.shared_1: Buffer(pad_temp.shared, float32, [504], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else(((((7 <= floormod(threadIdx.x_1, 63)) && (floormod(threadIdx.x_1, 63) < 56)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[(((((rc.outer.outer*392) + (floordiv(threadIdx.x_1, 63)*49)) + rx.outer.outer) + floormod(threadIdx.x_1, 63)) - 8)], 0f32, dtype=float32)
+            attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 392;
+            if @tir.likely((threadIdx.x_1 < 112), dtype=bool) {
+              pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else(((((1 <= floormod((floordiv(threadIdx.x_1, 7) + 2), 9)) && (floormod((floordiv(threadIdx.x_1, 7) + 2), 9) < 8)) && (1 <= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) && ((rx.outer.outer + floormod(threadIdx.x_1, 7)) < 8)), data[((((((rc.outer.outer*392) + (floordiv((floordiv(threadIdx.x_1, 7) + 56), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) + floormod(threadIdx.x_1, 7)) - 8) [...]
+            }
+            attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 392;
+            kernel.shared_1: Buffer(kernel.shared, float32, [768], [], scope="shared")[threadIdx.x_2] = kernel[(((((blockIdx.x*147456) + (floordiv(threadIdx.x_2, 24)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 24)*3)) + rx.outer.outer)]
+            attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 392;
+            if @tir.likely((threadIdx.x_2 < 376), dtype=bool) {
+              kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*147456) + (floordiv((floordiv(threadIdx.x_2, 8) + 49), 3)*4608)) + (rc.outer.outer*72)) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+            }
+            for (rc.outer.inner: int32, 0, 4) {
+              for (ff.outer.inner: int32, 0, 4) {
+                conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[((rc.outer.inner*126) + floormod(threadIdx.x, 49))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6))]))
+                conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 7)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 1)]))
+                conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 14)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 2)]))
+                conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 63)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 3)]))
+                conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 70)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 4)]))
+                conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 77)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 5)]))
               }
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope="shared")[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 8), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 16), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 32), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 32), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 64), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 40), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 80), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 56), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 112), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 64), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 128), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 80), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 160), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 88), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 176), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 104), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 208), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 112), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 224), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 128), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 256), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 136), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 272), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 152), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 304), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 160), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 320), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 176), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 352), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 184), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 368), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 200), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 400), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 208), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 416), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 224), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 448), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 232), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 464), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 248), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 496), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 256), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 512), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 272), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 544), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 280), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 560), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 296), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 592), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 304), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 608), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 320), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 640), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 328), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 656), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 344), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 688), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 352), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 704), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 368), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 736), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
-              kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 376), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 752), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
             }
           }
         }
-        for (i1.inner: int32, 0, 2) {
-          for (i3.inner: int32, 0, 7) {
-            compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-          }
+        for (i1.inner: int32, 0, 4) {
+          compute[((((blockIdx.x*1568) + (floordiv(threadIdx.x, 49)*196)) + (i1.inner*49)) + floormod(threadIdx.x, 49))] = max((conv2d_nchw_1[i1.inner] + bias[(((blockIdx.x*32) + (floordiv(threadIdx.x, 49)*4)) + i1.inner)]), 0f32)
         }
       }
     }
@@ -770,7 +330,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.363 ms
+    Execution time of this operator: 0.337 ms
 
 
 
@@ -819,35 +379,35 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
     conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
     conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
     conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
-    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
+    conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
     conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=4)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
     compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -867,14 +427,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=392)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=392)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 16)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -892,430 +452,40 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[14];
-      __shared__ float pad_temp_shared[72];
-      __shared__ float kernel_shared[3072];
+    extern "C" __global__ void __launch_bounds__(392) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[4];
+      __shared__ float pad_temp_shared[504];
+      __shared__ float kernel_shared[768];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
       conv2d_nchw[3] = 0.000000e+00f;
-      conv2d_nchw[4] = 0.000000e+00f;
-      conv2d_nchw[5] = 0.000000e+00f;
-      conv2d_nchw[6] = 0.000000e+00f;
-      conv2d_nchw[7] = 0.000000e+00f;
-      conv2d_nchw[8] = 0.000000e+00f;
-      conv2d_nchw[9] = 0.000000e+00f;
-      conv2d_nchw[10] = 0.000000e+00f;
-      conv2d_nchw[11] = 0.000000e+00f;
-      conv2d_nchw[12] = 0.000000e+00f;
-      conv2d_nchw[13] = 0.000000e+00f;
       for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
-        for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
+        for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
           __syncthreads();
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 9))) && (((((int)threadIdx.x) * 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
-          }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 9))) && ((((((int)threadIdx.x) * 4) + 1) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
+          pad_temp_shared[((int)threadIdx.x)] = (((((7 <= (((int)threadIdx.x) % 63)) && ((((int)threadIdx.x) % 63) < 56)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[(((((rc_outer_outer * 392) + ((((int)threadIdx.x) / 63) * 49)) + rx_outer_outer) + (((int)threadIdx.x) % 63)) - 8)] : 0.000000e+00f);
+          if (((int)threadIdx.x) < 112) {
+            pad_temp_shared[(((int)threadIdx.x) + 392)] = (((((1 <= (((((int)threadIdx.x) / 7) + 2) % 9)) && ((((((int)threadIdx.x) / 7) + 2) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 392) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 2) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
           }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 9))) && ((((((int)threadIdx.x) * 4) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
+          kernel_shared[((int)threadIdx.x)] = kernel[(((((((int)blockIdx.x) * 147456) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) % 24) * 3)) + rx_outer_outer)];
+          if (((int)threadIdx.x) < 376) {
+            kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 147456) + (((((int)threadIdx.x) + 392) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
           }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 9))) && ((((((int)threadIdx.x) * 4) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
-          kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
-          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
-          kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
-          kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
-          kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
-          kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
-          kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
-          kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
-          kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
-          kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
-          kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
-          kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
-          kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
-          kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
-          kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
           __syncthreads();
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          for (int rc_outer_inner = 0; rc_outer_inner < 4; ++rc_outer_inner) {
+            for (int ff_outer_inner = 0; ff_outer_inner < 4; ++ff_outer_inner) {
+              conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[((rc_outer_inner * 126) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6))]));
+              conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 7)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 1)]));
+              conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 14)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 2)]));
+              conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 63)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 3)]));
+              conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 70)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 4)]));
+              conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 77)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 5)]));
+            }
+          }
         }
       }
-      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
-        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
-          compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-        }
+      for (int i1_inner = 0; i1_inner < 4; ++i1_inner) {
+        compute[((((((int)blockIdx.x) * 1568) + ((((int)threadIdx.x) / 49) * 196)) + (i1_inner * 49)) + (((int)threadIdx.x) % 49))] = max((conv2d_nchw[i1_inner] + bias[(((((int)blockIdx.x) * 32) + ((((int)threadIdx.x) / 49) * 4)) + i1_inner)]), 0.000000e+00f);
       }
     }
 
@@ -1377,7 +547,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  31.305 seconds)
+   **Total running time of the script:** ( 2 minutes  38.800 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index f405825bc..1836c6efa 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -646,7 +646,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       9.9480       9.9365       9.9865       9.9211       0.0279   
+       9.6032       9.6115       9.6151       9.5830       0.0144   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index 500a89e8e..36de7c6c1 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -665,7 +665,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      758.2846     758.0884     758.6797     758.0856      0.2794   
+      767.6131     768.6869     769.0931     765.0593      1.8134   
                
 
 
@@ -693,7 +693,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  20.772 seconds)
+   **Total running time of the script:** ( 1 minutes  22.662 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index 2c94269c7..231721066 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -396,76 +396,30 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_7: placeholder_15: Buffer(placeholder_12, int32, [4916], []), placeholder_6: placeholder_16: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_5: placeholder_18: Buffer(placeholder_10, float32, [128, 256], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], [])} {
-      for (i0.outer.i1.outer.fused: int32, 0, 32) "parallel" {
-        allocate(compute_4: Pointer(global float32), float32, [2048]), storage_scope = global {
-          for (nb_j.inner: int32, 0, 2) {
-            for (i.inner.init: int32, 0, 64) {
-              let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
-               {
-                compute_5: Buffer(compute_4, float32, [2048], [])[cse_var_1] = 0f32
-                compute_5[(cse_var_1 + 1)] = 0f32
-                compute_5[(cse_var_1 + 2)] = 0f32
-                compute_5[(cse_var_1 + 3)] = 0f32
-                compute_5[(cse_var_1 + 4)] = 0f32
-                compute_5[(cse_var_1 + 5)] = 0f32
-                compute_5[(cse_var_1 + 6)] = 0f32
-                compute_5[(cse_var_1 + 7)] = 0f32
-                compute_5[(cse_var_1 + 8)] = 0f32
-                compute_5[(cse_var_1 + 9)] = 0f32
-                compute_5[(cse_var_1 + 10)] = 0f32
-                compute_5[(cse_var_1 + 11)] = 0f32
-                compute_5[(cse_var_1 + 12)] = 0f32
-                compute_5[(cse_var_1 + 13)] = 0f32
-                compute_5[(cse_var_1 + 14)] = 0f32
-                compute_5[(cse_var_1 + 15)] = 0f32
+      preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_8: placeholder_16: Buffer(placeholder_13, int32, [33], []), placeholder_7: placeholder_17: Buffer(placeholder_12, int32, [4916], []), placeholder_9: placeholder_18: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], [])} {
+      for (i0.outer.i1.outer.fused: int32, 0, 256) "parallel" {
+        allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
+          for (i.outer.inner: int32, 0, 2) {
+            for (i.inner.init: int32, 0, 8) {
+              for (j.init: int32, 0, 16) {
+                compute_5: Buffer(compute_4, float32, [256], [])[(((i.outer.inner*128) + (i.inner.init*16)) + j.init)] = 0f32
               }
             }
-            for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-              for (i.inner: int32, 0, 64) {
-                let cse_var_21: int32 = (elem_idx*16)
-                let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
-                let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-                let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*16384) + (i.inner*256))
-                let cse_var_17: int32 = (cse_var_20 + 9)
-                let cse_var_16: int32 = (cse_var_20 + 8)
-                let cse_var_15: int32 = (cse_var_20 + 7)
-                let cse_var_14: int32 = (cse_var_20 + 6)
-                let cse_var_13: int32 = (cse_var_20 + 5)
-                let cse_var_12: int32 = (cse_var_20 + 4)
-                let cse_var_11: int32 = (cse_var_20 + 3)
-                let cse_var_10: int32 = (cse_var_20 + 2)
-                let cse_var_9: int32 = (cse_var_20 + 15)
-                let cse_var_8: int32 = (cse_var_20 + 14)
-                let cse_var_7: int32 = (cse_var_20 + 13)
-                let cse_var_6: int32 = (cse_var_20 + 12)
-                let cse_var_5: int32 = (cse_var_20 + 11)
-                let cse_var_4: int32 = (cse_var_20 + 10)
-                let cse_var_3: int32 = (cse_var_20 + 1)
-                 {
-                  compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-                  compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+            for (elem_idx: int32, 0, let cse_var_1: int32 = floormod(i0.outer.i1.outer.fused, 32) in (placeholder_3[(cse_var_1 + 1)] - placeholder_3[cse_var_1])) {
+              if let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32) in @tir.likely((elem_idx < (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])), dtype=bool) {
+                for (i.inner: int32, 0, 8) {
+                  for (j: int32, 0, 16) {
+                    let cse_var_4: int32 = floormod(i0.outer.i1.outer.fused, 32)
+                    let cse_var_3: int32 = (((i.outer.inner*128) + (i.inner*16)) + j)
+                    compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_4]*16) + (elem_idx*16)) + j)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*4096) + (i.outer.inner*2048)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_4] + elem_idx)])], 0f32)))
+                  }
                 }
               }
             }
           }
-          for (i0.inner: int32, 0, 64) {
-            let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*32768) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
-            compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
+          for (i0.inner: int32, 0, 16) {
+            let cse_var_5: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
+            compute[ramp(cse_var_5, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_5, 1, 16)]), broadcast(0f32, 16))
           }
         }
       }
@@ -521,7 +475,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.849 ms
+    Execution time of this operator: 1.564 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index c2e072d59..a5f339e63 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,16 +5,16 @@
 
 Computation times
 =================
-**00:43.394** total execution time for **how_to_tune_with_autotvm** files:
+**00:44.228** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:43.362 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:44.198 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.019 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.016 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.004 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``) | 00:00.004 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 9d14a3999..435e4ed1b 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -879,8 +879,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2885496
-    No: 6   GFLOPS: 100.64/100.64   result: MeasureResult(costs=(0.0023003807083333333,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6175487041473389, timestamp=1656027173.1325228)      [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
-    No: 7   GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 6   GFLOPS: 94.20/94.20     result: MeasureResult(costs=(0.002457435416666667,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.801192283630371, timestamp=1656051922.092587) [('tile_f', [-1, 1, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3754080
+    No: 7   GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1003,7 +1003,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 16, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6225319
-    No: 8   GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 8   GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1126,7 +1126,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 32]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,943546
-    No: 9   GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 9   GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1249,7 +1249,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2868708
-    No: 10  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 10  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1267,7 +1267,7 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 32, 2, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4691833
-    No: 11  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 11  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1390,7 +1390,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,1042124
-    No: 12  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 12  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1513,7 +1513,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 32, 16]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10013405
-    No: 13  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1636,7 +1636,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 8, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 4, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6732082
-    No: 14  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1759,7 +1759,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7536735
-    No: 15  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 15  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1882,7 +1882,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 128, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,482121
-    No: 16  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 16  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2005,7 +2005,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 32, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2824525
-    No: 17  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 17  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2128,7 +2128,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4559286
-    No: 18  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 18  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -2251,7 +2251,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 32, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9677544
-    No: 19  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+    No: 19  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 738, in __call__
         yield remote, remote.load_module(os.path.split(build_result.filename)[1])
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 702, in run_through_rpc
@@ -2339,7 +2339,7 @@ for this template
       15: _PyEval_EvalFrameDefault
       14: 0x0000000000537c30
       13: _PyObject_FastCallKeywords
-      12: 0x00007fc50b4d8fa2
+      12: 0x00007f0129a2dfa2
       11: _ctypes_callproc
       10: ffi_call
       9: ffi_call_unix64
@@ -2404,7 +2404,7 @@ for this template
       21: _PyFunction_FastCallKeywords
       20: _PyEval_EvalFrameDefault
       19: _PyFunction_FastCall      [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6390073
-    No: 20  GFLOPS: 142.17/142.17   result: MeasureResult(costs=(0.0016283737099999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4156231880187988, timestamp=1656027199.6258023)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
+    No: 20  GFLOPS: 144.37/144.37   result: MeasureResult(costs=(0.00160355996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4382915496826172, timestamp=1656051948.7935243)      [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
 
 
 
@@ -2461,7 +2461,7 @@ and measure running time.
     Best config:
     [('tile_f', [-1, 1, 4, 1]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9881539
     Finish loading 20 records
-    Time cost of this operator: 0.001987
+    Time cost of this operator: 0.002021
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 330c12777..297f33ed0 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -328,10 +328,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  314.2     98.742   (1, 2, 10, 10, 3)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.085     0.969    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.917     0.288    (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             318.202   -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  313.8     98.72    (1, 2, 10, 10, 3)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.15      0.991    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.918     0.289    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             317.868   -        -                  -       -        
 
 
 
@@ -397,10 +397,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  
     ---------                                     ---                                           --------  -------  -----              ------  -------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  77.1      96.686   (1, 6, 10, 10, 1)  2       1        
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.741     2.184    (1, 6, 10, 10)     1       1        
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.901     1.13     (1, 1, 10, 10, 3)  1       1        
-    Total_time                                    -                                             79.742    -        -                  -       -        
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  247.5     98.827   (1, 1, 10, 10, 6)  2       1        
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       2.012     0.803    (1, 6, 10, 10)     1       1        
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.925     0.369    (1, 1, 10, 10, 3)  1       1        
+    Total_time                                    -                                             250.437   -        -                  -       -        
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 20c958a38..d40a7a3ee 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -225,7 +225,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpucmdjl8m/images/random'
+    '/tmp/tmp43fbeicb/images/random'
 
 
 
@@ -325,8 +325,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpucmdjl8m/images/target contains 8144 images
-    /tmp/tmpucmdjl8m/images/random contains 5000 images
+    /tmp/tmp43fbeicb/images/target contains 8144 images
+    /tmp/tmp43fbeicb/images/random contains 5000 images
 
 
 
@@ -501,13 +501,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 55s - loss: 0.2270 - accuracy: 0.9237 - val_loss: 0.1414 - val_accuracy: 0.9596
+    328/328 - 56s - loss: 0.2080 - accuracy: 0.9267 - val_loss: 0.1195 - val_accuracy: 0.9603
     Epoch 2/3
-    328/328 - 52s - loss: 0.0998 - accuracy: 0.9615 - val_loss: 0.1108 - val_accuracy: 0.9641
+    328/328 - 53s - loss: 0.0932 - accuracy: 0.9637 - val_loss: 0.1037 - val_accuracy: 0.9653
     Epoch 3/3
-    328/328 - 52s - loss: 0.0668 - accuracy: 0.9755 - val_loss: 0.1075 - val_accuracy: 0.9656
+    328/328 - 53s - loss: 0.0592 - accuracy: 0.9782 - val_loss: 0.0993 - val_accuracy: 0.9660
 
-    <keras.callbacks.History object at 0x7f994ce79d90>
+    <keras.callbacks.History object at 0x7f31651d4450>
 
 
 
@@ -864,7 +864,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 7 minutes  46.380 seconds)
+   **Total running time of the script:** ( 7 minutes  51.058 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index f11f1c83f..4dd0b72b4 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**08:33.417** total execution time for **how_to_work_with_microtvm** files:
+**08:40.588** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 07:46.380 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 07:51.058 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:43.514 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:45.746 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.523 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.784 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.000 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index d36476c42..f8706ab43 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:10.088** total execution time for **how_to_work_with_relay** files:
+**00:11.521** total execution time for **how_to_work_with_relay** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:08.287 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``) | 00:09.886 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.795 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                   | 00:01.629 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)       | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index 5c0df7400..812866410 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -259,7 +259,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7f98bb001a70>
+    <function my_cuda_math_rule at 0x7f30e3bf1c20>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 2cf10ac6a..1e17866bb 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**00:04.038** total execution time for **how_to_work_with_schedules** files:
+**00:04.167** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.886 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.952 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.945 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.971 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.524 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.542 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.511 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.525 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.098 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.102 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.034 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.036 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.028 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.027 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.013 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 7306702c5..8ab8618fa 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -346,7 +346,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpde4lmcy4/input0.cc'\nsource_filename = \"/tmp/tmpde4lmcy4/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpweizmmus/input0.cc'\nsource_filename = \"/tmp/tmpweizmmus/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index ec0b11455..f349c290f 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:20.882** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:22.076** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:20.876 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:22.069 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.006 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.007 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 40481b53a..245995cd3 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -291,7 +291,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 22.47s!
+    resnet18_v1 inference graph built in 23.39s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index c0c11e543..31e04bad7 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -335,7 +335,7 @@ The compilation steps are:
       "target_host parameter is going to be deprecated. "
     /workspace/python/tvm/relay/build_module.py:409: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 15.74s!
+    yolov3-tiny inference graph built in 16.58s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index da6f3f7b4..b634a87a0 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:30.677** total execution time for **topic_vta_tutorials_frontend** files:
+**01:31.234** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:47.765 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:47.989 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:42.912 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:43.245 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 5f681f258..f076557e2 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.227** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.250** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.836 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.855 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.391 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.395 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index 175998003..06f340606 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.707** total execution time for **topic_vta_tutorials** files:
+**00:00.712** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.377 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.382 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.331 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.329 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 5559a2a6a..fcbd91114 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -327,7 +327,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 93.753 ms
+    Execution time of this operator: 96.056 ms
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index 8e26296ca..b32dd8831 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -449,16 +449,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 9.62/9.62       result: MeasureResult(costs=(0.0278975312,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5804417133331299, timestamp=1656026038.151792)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
-    No: 2   GFLOPS: 2.82/9.62       result: MeasureResult(costs=(0.0953100834,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6685631275177002, timestamp=1656026039.8406107)       [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
-    No: 3   GFLOPS: 11.74/11.74     result: MeasureResult(costs=(0.0228682792,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5727012157440186, timestamp=1656026040.9035113)       [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
-    No: 4   GFLOPS: 1.42/11.74      result: MeasureResult(costs=(0.1886443622,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.1390581130981445, timestamp=1656026044.6096451)       [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
-    No: 5   GFLOPS: 3.61/11.74      result: MeasureResult(costs=(0.07443491960000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.333608865737915, timestamp=1656026046.0723867) [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
-    No: 6   GFLOPS: 1.64/11.74      result: MeasureResult(costs=(0.164083069,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.7599570751190186, timestamp=1656026049.3952112)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
-    No: 7   GFLOPS: 0.85/11.74      result: MeasureResult(costs=(0.3150717742,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.153055191040039, timestamp=1656026054.598967) [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 8   GFLOPS: 10.11/11.74     result: MeasureResult(costs=(0.0265527474,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5782184600830078, timestamp=1656026055.1860697)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
-    No: 9   GFLOPS: 1.69/11.74      result: MeasureResult(costs=(0.1585821982,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6368064880371094, timestamp=1656026057.9431615)       [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
-    No: 10  GFLOPS: 2.64/11.74      result: MeasureResult(costs=(0.1015259472,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7274973392486572, timestamp=1656026059.7293758)       [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
+    No: 1   GFLOPS: 10.67/10.67     result: MeasureResult(costs=(0.025156975999999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5476083755493164, timestamp=1656050773.366495)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 2   GFLOPS: 2.30/10.67      result: MeasureResult(costs=(0.11648420799999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.009645700454712, timestamp=1656050775.947023)  [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
+    No: 3   GFLOPS: 11.57/11.57     result: MeasureResult(costs=(0.0232001236,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6216588020324707, timestamp=1656050776.5265033)       [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
+    No: 4   GFLOPS: 1.75/11.57      result: MeasureResult(costs=(0.15298220440000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5736987590789795, timestamp=1656050779.678481) [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
+    No: 5   GFLOPS: 3.66/11.57      result: MeasureResult(costs=(0.07338461339999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3172941207885742, timestamp=1656050781.1258898)        [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
+    No: 6   GFLOPS: 1.77/11.57      result: MeasureResult(costs=(0.15207398700000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5591161251068115, timestamp=1656050784.2676435)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
+    No: 7   GFLOPS: 0.87/11.57      result: MeasureResult(costs=(0.3097624102,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.080232381820679, timestamp=1656050789.3899856)        [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 8   GFLOPS: 10.54/11.57     result: MeasureResult(costs=(0.025457384399999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5612666606903076, timestamp=1656050789.9642692)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
+    No: 9   GFLOPS: 1.90/11.57      result: MeasureResult(costs=(0.1414467218,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.3786518573760986, timestamp=1656050792.463337)        [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
+    No: 10  GFLOPS: 2.78/11.57      result: MeasureResult(costs=(0.0964916106,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.665832757949829, timestamp=1656050794.172781) [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 6e10e746b..7e98be428 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -314,7 +314,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 494.44040632999986, 'median': 494.373336249987, 'std': 0.548164183490144}
+    {'mean': 501.0085916000003, 'median': 500.789817750001, 'std': 1.0780751727340638}
 
 
 
@@ -550,31 +550,31 @@ the tuning data to.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.45/  17.45 GFLOPS | Progress: (4/20) | 6.23 s
    [Task  1/25]  Current/Best:    6.16/  17.45 GFLOPS | Progress: (8/20) | 9.11 s
    [Task  1/25]  Current/Best:   11.54/  22.66 GFLOPS | Progress: (12/20) | 11.61 s
    [Task  1/25]  Current/Best:   16.70/  22.67 GFLOPS | Progress: (16/20) | 13.30 s
    [Task  1/25]  Current/Best:   11.59/  23.84 GFLOPS | Progress: (20/20) | 15.04 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.15/  13.19 GFLOPS | Progress: (4/20) | 3.80 s
    [Task  2/25]  Current/Best:   14.11/  18.76 GFLOPS | Progress: (8/20) | 5.10 s
    [Task  2/25]  Current/Best:   21.21/  21.21 GFLOPS | Progress: (12/20) | 6.41 s
    [Task  2/25]  Current/Best:   12.62/  21.21 GFLOPS | Progress: (16/20) | 7.67 s
    [Task  2/25]  Current/Best:   18.68/  21.21 GFLOPS | Progress: (20/20) | 9.27 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.62/  10.59 GFLOPS | Progress: (4/20) | 5.85 s
    [Task  3/25]  Current/Best:   15.60/  16.87 GFLOPS | Progress: (8/20) | 7.77 s
    [Task  3/25]  Current/Best:   14.86/  16.87 GFLOPS | Progress: (12/20) | 9.48 s
    [Task  3/25]  Current/Best:    7.19/  23.81 GFLOPS | Progress: (16/20) | 11.41 s
    [Task  3/25]  Current/Best:   12.69/  23.81 GFLOPS | Progress: (20/20) | 16.00 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.48/  19.69 GFLOPS | Progress: (4/20) | 2.38 s
    [Task  4/25]  Current/Best:    6.68/  19.69 GFLOPS | Progress: (8/20) | 7.15 s
    [Task  4/25]  Current/Best:   21.50/  21.50 GFLOPS | Progress: (12/20) | 12.08 s
    [Task  4/25]  Current/Best:   16.72/  21.50 GFLOPS | Progress: (16/20) | 14.52 s
    [Task  4/25]  Current/Best:   13.27/  21.50 GFLOPS | Progress: (20/20) | 16.50 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.48/  10.16 GFLOPS | Progress: (4/20) | 2.59 s
    [Task  5/25]  Current/Best:   11.62/  12.62 GFLOPS | Progress: (8/20) | 4.65 s
    [Task  5/25]  Current/Best:   11.48/  18.04 GFLOPS | Progress: (12/20) | 7.85 s
    [Task  5/25]  Current/Best:   11.65/  22.74 GFLOPS | Progress: (16/20) | 9.26 s
    [Task  5/25]  Current/Best:   12.01/  22.74 GFLOPS | Progress: (20/20) | 11.13 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.21/  20.74 GFLOPS | Progress: (4/20) | 4.11 s
    [Task  6/25]  Current/Best:   18.91/  20.74 GFLOPS | Progress: (8/20) | 5.87 s
    [Task  6/25]  Current/Best:   13.31/  20.74 GFLOPS | Progress: (12/20) | 7.83 s
    [Task  6/25]  Current/Best:   19.94/  20.74 GFLOPS | Progress: (16/20) | 10.10 s
    [Task  6/25]  Current/Best:    3.70/  20.74 GFLOPS | Progress: (20/20) | 12.61 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   11.20/  12.97 GFLOPS | Progress: (4/20) | 3.62 s
    [Task  7/25]  Current/Best:   20.24/  20.89 GFLOPS | Progress: (8/20) | 5.11 s
    [Task  7/25]  Current/Best:   16.12/  20.90 GFLOPS | Progress: (12/20) | 7.01 s
    [Task  7/25]  Current/Best:   12.22/  20.90 GFLOPS | Progress: (16/20) | 9.06 s
    [Task  7/25]  Current/Best:    6.26/  21.78 GFLOPS | Progress: (20/20) | 11.53 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    9.66/  13.65 GFLOPS | Progress: (4/20) | 2.95 s
    [Task  8/25]  Current/Best:    9.02/  13.65 GFLOPS | Progress: (8/20) | 8.16 s
    [Task  8/25]  Current/Best:   12.49/  13.65 GFLOPS | Progress: (12/20) | 14.71 s
    [Task  8/25]  Current/Best:   18.86/  18.86 GFLOPS | Progress: (16/20) | 16.82 s
    [Task  8/25]  Current/Best:   18.90/  18.90 GFLOPS | Progress: (20/20) | 24.07 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.26/  15.77 GFLOPS | Progress: (4/20) | 11.96 s
    [Task  9/25]  Current/Best:   23.29/  23.29 GFLOPS | Progress: (8/20) | 13.81 s
    [Task  9/25]  Current/Best:    8.26/  23.29 GFLOPS | Progress: (12/20) | 16.41 s
    [Task  9/25]  Current/Best:   17.85/  23.29 GFLOPS | Progress: (16/20) | 19.34 s
    [Task  9/25]  Current/Best:    9.08/  23.29 GFLOPS | Progress: (20/20) | 28.08 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.23/  18.23 GFLOPS | Progress: (4/20) | 2.61 s
    [Task 10/25]  Current/Best:   15.43/  18.23 GFLOPS | Progress: (8/20) | 4.26 s
    [Task 10/25]  Current/Best:   12.26/  18.81 GFLOPS | Progress: (12/20) | 5.82 s
    [Task 10/25]  Current/Best:   19.20/  20.05 GFLOPS | Progress: (16/20) | 6.93 s
    [Task 10/25]  Current/Best:    8.79/  20.05 GFLOPS | Progress: (20/20
 ) | 8.47 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.33/  18.12 GFLOPS | Progress: (4/20) | 3.39 s
    [Task 11/25]  Current/Best:   16.94/  18.12 GFLOPS | Progress: (8/20) | 6.21 s
    [Task 11/25]  Current/Best:   18.24/  18.24 GFLOPS | Progress: (12/20) | 8.30 s
    [Task 11/25]  Current/Best:   13.32/  21.16 GFLOPS | Progress: (16/20) | 11.29 s
    [Task 11/25]  Current/Best:   19.49/  21.36 GFLOPS | Progress: (20/20) | 13.39 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.80/  17.85 GFLOPS | Progress: (4/20) | 5.82 s
    [Task 12/25]  Current/Best:    5.01/  17.85 GFLOPS | Progress: (8/20) | 9.83 s
    [Task 12/25]  Current/Best:   18.77/  18.86 GFLOPS | Progress: (12/20) | 11.82 s
    [Task 12/25]  Current/Best:   15.16/  18.86 GFLOPS | Progress: (16/20) | 14.82 s
    [Task 12/25]  Current/Best:   15.09/  18.86 GFLOPS | Progress: (20/20) | 16.75 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    9.00/  17.23 GFLOPS | Progress: (4/20) | 3.75 s
    [Task 13/25]  Current/Best:   16.08/  20.90 GFLOPS | Progress: (8/20) | 6.39 s
    [Task 13/25]  Current/Best:   19.51/  21.38 GFLOPS | Progress: (12/20) | 9.41 s
    [Task 13/25]  Current/Best:   12.19/  21.38 GFLOPS | Progress: (16/20) | 12.83 s
    [Task 13/25]  Current/Best:   18.48/  21.38 GFLOPS | Progress: (20/20) | 15.18 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.58/  13.58 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 14/25]  Current/Best:    6.10/  13.58 GFLOPS | Progress: (8/20) | 5.52 s
    [Task 14/25]  Current/Best:   20.49/  20.49 GFLOPS | Progress: (12/20) | 8.27 s
    [Task 14/25]  Current/Best:   16.75/  20.49 GFLOPS | Progress: (16/20) | 9.96 s Done.
-
    [Task 14/25]  Current/Best:   16.86/  20.49 GFLOPS | Progress: (20/20) | 11.69 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.15/  17.68 GFLOPS | Progress: (4/20) | 2.71 s
    [Task 15/25]  Current/Best:   12.89/  18.02 GFLOPS | Progress: (8/20) | 4.08 s
    [Task 15/25]  Current/Best:   10.35/  22.23 GFLOPS | Progress: (12/20) | 6.49 s
    [Task 15/25]  Current/Best:   20.42/  22.23 GFLOPS | Progress: (16/20) | 10.44 s
    [Task 15/25]  Current/Best:    9.70/  22.23 GFLOPS | Progress: (20/20) | 11.47 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.01/  20.01 GFLOPS | Progress: (4/20) | 3.03 s
    [Task 16/25]  Current/Best:    3.04/  20.01 GFLOPS | Progress: (8/20) | 4.66 s
    [Task 16/25]  Current/Best:   19.61/  20.01 GFLOPS | Progress: (12/20) | 5.88 s
    [Task 16/25]  Current/Best:   17.70/  20.01 GFLOPS | Progress: (16/20) 
 | 7.26 s
    [Task 16/25]  Current/Best:    9.98/  22.21 GFLOPS | Progress: (20/20) | 9.45 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   12.69/  18.88 GFLOPS | Progress: (4/20) | 4.86 s
    [Task 17/25]  Current/Best:   14.47/  23.21 GFLOPS | Progress: (8/20) | 7.66 s
    [Task 17/25]  Current/Best:   16.91/  23.21 GFLOPS | Progress: (12/20) | 9.72 s
    [Task 17/25]  Current/Best:   16.55/  23.21 GFLOPS | Progress: (16/20) | 11.94 s
    [Task 17/25]  Current/Best:   10.03/  23.21 GFLOPS | Progress: (20/20) | 14.10 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.33/  17.69 GFLOPS | Progress: (4/20) | 3.84 s
    [Task 18/25]  Current/Best:   10.53/  17.69 GFLOPS | Progress: (8/20) | 7.64 s
    [Task 18/25]  Current/Best:   19.53/  19.53 GFLOPS | Progress: (12/20) | 9.57 s
    [Task 18/25]  Current/Best:   10.19/  19.53 GFLOPS | Progress: (16/20) | 13.47 s
    [Task 18/25]  Current/Best:   20.57/  20.57 GFLOPS | Progress: (20/20) | 15.01 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.13/  20.26 GFLOPS | Progress: (4/20) | 6.18 s
    [Task 19/25]  Current/Best:    2.60/  20.26 GFLOPS | Progress: (8/20) | 9.52 s
    [Task 19/25]  Current/Best:   18.39/  21.01 GFLOPS | Progress: (12/20) | 12.51 s
    [Task 19/25]  Current/Best:   14.95/  21.80 GFLOPS | Progress: (16/20) | 15.56 s
    [Task 19/25]  Current/Best:    2.70/  23.14 GFLOPS | Progress: (20/20) | 18.33 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    8.77/  15.04 GFLOPS | Progress: (4/20) | 3.34 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.38/  17.38 GFLOPS | Progress: (4/20) | 6.94 s
    [Task  1/25]  Current/Best:    6.16/  17.38 GFLOPS | Progress: (8/20) | 9.41 s
    [Task  1/25]  Current/Best:   11.49/  22.56 GFLOPS | Progress: (12/20) | 11.90 s
    [Task  1/25]  Current/Best:   16.68/  22.64 GFLOPS | Progress: (16/20) | 13.59 s
    [Task  1/25]  Current/Best:   11.57/  23.80 GFLOPS | Progress: (20/20) | 15.36 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.10/  12.88 GFLOPS | Progress: (4/20) | 3.88 s
    [Task  2/25]  Current/Best:   14.01/  18.40 GFLOPS | Progress: (8/20) | 5.19 s
    [Task  2/25]  Current/Best:   20.85/  20.85 GFLOPS | Progress: (12/20) | 6.52 s
    [Task  2/25]  Current/Best:   12.33/  20.85 GFLOPS | Progress: (16/20) | 7.82 s
    [Task  2/25]  Current/Best:   19.28/  20.85 GFLOPS | Progress: (20/20) | 9.48 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/  10.56 GFLOPS | Progress: (4/20) | 5.94 s
    [Task  3/25]  Current/Best:   15.50/  16.62 GFLOPS | Progress: (8/20) | 7.88 s
    [Task  3/25]  Current/Best:   14.77/  16.62 GFLOPS | Progress: (12/20) | 9.62 s
    [Task  3/25]  Current/Best:    7.19/  23.48 GFLOPS | Progress: (16/20) | 11.57 s
    [Task  3/25]  Current/Best:   12.48/  23.48 GFLOPS | Progress: (20/20) | 16.20 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.49/  20.41 GFLOPS | Progress: (4/20) | 2.45 s
    [Task  4/25]  Current/Best:    6.66/  20.41 GFLOPS | Progress: (8/20) | 7.29 s
    [Task  4/25]  Current/Best:   21.21/  21.21 GFLOPS | Progress: (12/20) | 12.39 s
    [Task  4/25]  Current/Best:   17.07/  21.21 GFLOPS | Progress: (16/20) | 14.85 s
    [Task  4/25]  Current/Best:   13.30/  21.21 GFLOPS | Progress: (20/20) | 16.93 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.06/  10.03 GFLOPS | Progress: (4/20) | 2.67 s
    [Task  5/25]  Current/Best:   11.08/  11.96 GFLOPS | Progress: (8/20) | 4.81 s
    [Task  5/25]  Current/Best:   10.03/  17.75 GFLOPS | Progress: (12/20) | 8.09 s
    [Task  5/25]  Current/Best:   11.11/  21.93 GFLOPS | Progress: (16/20) | 9.53 s
    [Task  5/25]  Current/Best:   11.56/  21.93 GFLOPS | Progress: (20/20) | 11.51 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.22/  20.68 GFLOPS | Progress: (4/20) | 4.22 s
    [Task  6/25]  Current/Best:   18.82/  20.68 GFLOPS | Progress: (8/20) | 6.00 s
    [Task  6/25]  Current/Best:   12.79/  20.68 GFLOPS | Progress: (12/20) | 7.99 s
    [Task  6/25]  Current/Best:   19.66/  20.68 GFLOPS | Progress: (16/20) | 10.29 s
    [Task  6/25]  Current/Best:    3.64/  20.68 GFLOPS | Progress: (20/20) | 12.83 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   10.13/  12.13 GFLOPS | Progress: (4/20) | 3.79 s
    [Task  7/25]  Current/Best:   19.99/  20.89 GFLOPS | Progress: (8/20) | 5.32 s
    [Task  7/25]  Current/Best:   15.67/  20.89 GFLOPS | Progress: (12/20) | 7.26 s
    [Task  7/25]  Current/Best:   12.20/  20.89 GFLOPS | Progress: (16/20) | 9.35 s
    [Task  7/25]  Current/Best:    6.32/  21.56 GFLOPS | Progress: (20/20) | 11.85 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    9.56/  14.51 GFLOPS | Progress: (4/20) | 2.98 s
    [Task  8/25]  Current/Best:    9.31/  14.51 GFLOPS | Progress: (8/20) | 8.28 s
    [Task  8/25]  Current/Best:   12.67/  14.51 GFLOPS | Progress: (12/20) | 14.97 s
    [Task  8/25]  Current/Best:   18.91/  18.91 GFLOPS | Progress: (16/20) | 17.06 s
    [Task  8/25]  Current/Best:   19.79/  19.79 GFLOPS | Progress: (20/20) | 24.28 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.24/  15.66 GFLOPS | Progress: (4/20) | 12.00 s
    [Task  9/25]  Current/Best:   23.28/  23.28 GFLOPS | Progress: (8/20) | 13.80 s
    [Task  9/25]  Current/Best:    8.24/  23.28 GFLOPS | Progress: (12/20) | 16.33 s
    [Task  9/25]  Current/Best:   17.73/  23.28 GFLOPS | Progress: (16/20) | 19.22 s
    [Task  9/25]  Current/Best:    8.95/  23.28 GFLOPS | Progress: (20/20) | 27.97 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.14/  18.14 GFLOPS | Progress: (4/20) | 2.61 s
    [Task 10/25]  Current/Best:   15.52/  18.14 GFLOPS | Progress: (8/20) | 4.32 s
    [Task 10/25]  Current/Best:   12.14/  19.06 GFLOPS | Progress: (12/20) | 5.88 s
    [Task 10/25]  Current/Best:   19.04/  20.28 GFLOPS | Progress: (16/20) | 7.01 s
    [Task 10/25]  Current/Best:    8.95/  20.28 GFLOPS | Progress: (20/20
 ) | 8.59 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.19/  18.07 GFLOPS | Progress: (4/20) | 3.44 s
    [Task 11/25]  Current/Best:   16.77/  18.07 GFLOPS | Progress: (8/20) | 6.27 s
    [Task 11/25]  Current/Best:   18.05/  18.07 GFLOPS | Progress: (12/20) | 8.38 s
    [Task 11/25]  Current/Best:   10.41/  21.25 GFLOPS | Progress: (16/20) | 11.34 s
    [Task 11/25]  Current/Best:   19.39/  21.54 GFLOPS | Progress: (20/20) | 13.47 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.75/  18.08 GFLOPS | Progress: (4/20) | 5.88 s
    [Task 12/25]  Current/Best:    5.06/  18.08 GFLOPS | Progress: (8/20) | 9.95 s
    [Task 12/25]  Current/Best:   19.22/  19.22 GFLOPS | Progress: (12/20) | 11.96 s
    [Task 12/25]  Current/Best:   13.06/  19.22 GFLOPS | Progress: (16/20) | 14.93 s
    [Task 12/25]  Current/Best:   15.08/  19.22 GFLOPS | Progress: (20/20) | 16.87 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.83/  17.19 GFLOPS | Progress: (4/20) | 3.85 s
    [Task 13/25]  Current/Best:   15.70/  20.60 GFLOPS | Progress: (8/20) | 6.52 s
    [Task 13/25]  Current/Best:   19.37/  21.51 GFLOPS | Progress: (12/20) | 9.68 s
    [Task 13/25]  Current/Best:   12.19/  21.51 GFLOPS | Progress: (16/20) | 13.23 s
    [Task 13/25]  Current/Best:   18.48/  21.51 GFLOPS | Progress: (20/20) | 15.58 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   13.68/  13.68 GFLOPS | Progress: (4/20) | 3.43 s
    [Task 14/25]  Current/Best:    6.07/  13.68 GFLOPS | Progress: (8/20) | 5.63 s
    [Task 14/25]  Current/Best:   20.28/  20.28 GFLOPS | Progress: (12/20) | 8.34 s
    [Task 14/25]  Current/Best:   17.09/  20.28 GFLOPS | Progress: (16/20) | 10.02 s Done.
+
    [Task 14/25]  Current/Best:   15.30/  20.28 GFLOPS | Progress: (20/20) | 11.80 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   16.01/  17.40 GFLOPS | Progress: (4/20) | 2.83 s
    [Task 15/25]  Current/Best:   14.32/  17.73 GFLOPS | Progress: (8/20) | 4.21 s
    [Task 15/25]  Current/Best:   10.32/  22.01 GFLOPS | Progress: (12/20) | 6.58 s
    [Task 15/25]  Current/Best:   20.18/  22.01 GFLOPS | Progress: (16/20) | 9.81 s
    [Task 15/25]  Current/Best:    9.67/  22.01 GFLOPS | Progress: (20/20) | 10.84 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   20.53/  20.53 GFLOPS | Progress: (4/20) | 3.06 s
    [Task 16/25]  Current/Best:    3.03/  20.53 GFLOPS | Progress: (8/20) | 4.69 s
    [Task 16/25]  Current/Best:   19.11/  20.53 GFLOPS | Progress: (12/20) | 5.92 s
    [Task 16/25]  Current/Best:   17.14/  20.53 GFLOPS | Progress: (16/20) |
  7.29 s
    [Task 16/25]  Current/Best:    9.94/  20.53 GFLOPS | Progress: (20/20) | 9.50 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   14.04/  18.74 GFLOPS | Progress: (4/20) | 4.85 s
    [Task 17/25]  Current/Best:   14.38/  22.98 GFLOPS | Progress: (8/20) | 7.80 s
    [Task 17/25]  Current/Best:   16.74/  22.98 GFLOPS | Progress: (12/20) | 9.85 s
    [Task 17/25]  Current/Best:   16.52/  22.98 GFLOPS | Progress: (16/20) | 12.12 s
    [Task 17/25]  Current/Best:   10.00/  22.98 GFLOPS | Progress: (20/20) | 14.30 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.27/  17.25 GFLOPS | Progress: (4/20) | 3.92 s
    [Task 18/25]  Current/Best:    9.87/  19.60 GFLOPS | Progress: (8/20) | 7.74 s
    [Task 18/25]  Current/Best:   19.02/  19.60 GFLOPS | Progress: (12/20) | 9.74 s
    [Task 18/25]  Current/Best:    9.83/  19.60 GFLOPS | Progress: (16/20) | 13.73 s
    [Task 18/25]  Current/Best:   20.39/  20.39 GFLOPS | Progress: (20/20) | 15.25 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    6.08/  20.07 GFLOPS | Progress: (4/20) | 6.46 s
    [Task 19/25]  Current/Best:    2.60/  20.07 GFLOPS | Progress: (8/20) | 9.88 s
    [Task 19/25]  Current/Best:   18.95/  20.66 GFLOPS | Progress: (12/20) | 12.93 s
    [Task 19/25]  Current/Best:   15.12/  20.66 GFLOPS | Progress: (16/20) | 16.00 s
    [Task 19/25]  Current/Best:    2.70/  22.96 GFLOPS | Progress: (20/20) | 18.83 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.87/  14.90 GFLOPS | Progress: (4/20) | 3.44 s Done.
      Done.
-
    [Task 20/25]  Current/Best:    9.51/  15.04 GFLOPS | Progress: (8/20) | 6.89 s
    [Task 20/25]  Current/Best:    2.32/  16.45 GFLOPS | Progress: (12/20) | 10.81 s
    [Task 20/25]  Current/Best:   12.50/  16.45 GFLOPS | Progress: (16/20) | 14.66 s
    [Task 20/25]  Current/Best:   12.17/  21.95 GFLOPS | Progress: (20/20) | 16.78 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.41/  17.69 GFLOPS | Progress: (4/20) | 3.28 s
    [Task 21/25]  Current/Best:   14.61/  17.69 GFLOPS | Progress: (8/20) | 4.91 s
    [Task 21/25]  Current/Best:    1.61/  17.69 GFLOPS | Progress: (12/20) | 7.04 s
    [Task 21/25]  Current/Best:   18.27/  18.27 GFLOPS | Progress: (16/20) | 10.57 s
    [Task 21/25]  Current/Best:    4.47/  18.27 GFLOPS | Progress: (20/20) | 17.97 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  17.03 GFLOPS | Progress: (4/20
 ) | 2.67 s
    [Task 22/25]  Current/Best:    8.62/  22.00 GFLOPS | Progress: (8/20) | 4.71 s
    [Task 22/25]  Current/Best:   19.89/  22.00 GFLOPS | Progress: (12/20) | 7.10 s
    [Task 22/25]  Current/Best:   15.12/  22.00 GFLOPS | Progress: (16/20) | 9.26 s
    [Task 22/25]  Current/Best:   14.97/  22.00 GFLOPS | Progress: (20/20) | 10.99 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.49/  20.69 GFLOPS | Progress: (4/20) | 3.25 s
    [Task 23/25]  Current/Best:   15.38/  20.69 GFLOPS | Progress: (8/20) | 6.62 s
    [Task 23/25]  Current/Best:   20.78/  21.55 GFLOPS | Progress: (12/20) | 8.48 s
    [Task 23/25]  Current/Best:    6.38/  21.55 GFLOPS | Progress: (16/20) | 15.64 s
    [Task 23/25]  Current/Best:    7.80/  21.55 GFLOPS | Progress: (20/20) | 19.87 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.60/   8.60 GFLOPS | Progress: (4/20) | 11.80 s
    [Task 24/25]  Current/Best:    2.11/   8.60 GFLOPS | Progress: (8/20) | 22.86 s
    [Task 24/25]  Current/Best:    4.42/   8.60 GFLOPS | Progress: (12/20) | 34.39 s Done.
+
    [Task 20/25]  Current/Best:   10.08/  14.90 GFLOPS | Progress: (8/20) | 6.90 s
    [Task 20/25]  Current/Best:    2.32/  16.72 GFLOPS | Progress: (12/20) | 10.91 s
    [Task 20/25]  Current/Best:   12.24/  16.72 GFLOPS | Progress: (16/20) | 14.92 s
    [Task 20/25]  Current/Best:   13.25/  21.48 GFLOPS | Progress: (20/20) | 17.06 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.37/  17.51 GFLOPS | Progress: (4/20) | 3.37 s
    [Task 21/25]  Current/Best:   14.32/  17.51 GFLOPS | Progress: (8/20) | 4.99 s
    [Task 21/25]  Current/Best:    1.61/  17.51 GFLOPS | Progress: (12/20) | 7.14 s
    [Task 21/25]  Current/Best:   18.11/  18.11 GFLOPS | Progress: (16/20) | 10.79 s
    [Task 21/25]  Current/Best:    4.45/  18.11 GFLOPS | Progress: (20/20) | 18.50 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  16.88 GFLOPS | Progress: (4/20
 ) | 2.76 s
    [Task 22/25]  Current/Best:    9.11/  21.23 GFLOPS | Progress: (8/20) | 4.76 s
    [Task 22/25]  Current/Best:   19.51/  21.23 GFLOPS | Progress: (12/20) | 7.23 s
    [Task 22/25]  Current/Best:   15.06/  21.23 GFLOPS | Progress: (16/20) | 9.38 s
    [Task 22/25]  Current/Best:   14.96/  21.23 GFLOPS | Progress: (20/20) | 11.15 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   17.27/  20.12 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 23/25]  Current/Best:   15.55/  20.12 GFLOPS | Progress: (8/20) | 6.86 s
    [Task 23/25]  Current/Best:   20.68/  20.87 GFLOPS | Progress: (12/20) | 8.75 s
    [Task 23/25]  Current/Best:    5.27/  20.87 GFLOPS | Progress: (16/20) | 16.26 s
    [Task 23/25]  Current/Best:    7.16/  20.87 GFLOPS | Progress: (20/20) | 20.61 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.40/   8.40 GFLOPS | Progress: (4/20) | 11.85 s
    [Task 24/25]  Current/Best:    2.93/   8.40 GFLOPS | Progress: (8/20) | 23.13 s
    [Task 24/25]  Current/Best:    3.74/   8.40 GFLOPS | Progress: (12/20) | 34.09 s Done.
      Done.
-
    [Task 24/25]  Current/Best:    6.84/   8.60 GFLOPS | Progress: (16/20) | 40.25 s
    [Task 24/25]  Current/Best:    3.27/   8.88 GFLOPS | Progress: (20/20) | 46.41 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.72 GFLOPS | Progress: (4/20) | 11.60 s
    [Task 25/25]  Current/Best:    5.24/   7.97 GFLOPS | Progress: (8/20) | 22.86 s
    [Task 25/25]  Current/Best:    5.92/   7.97 GFLOPS | Progress: (12/20) | 34.32 s
    [Task 25/25]  Current/Best:    5.85/   9.22 GFLOPS | Progress: (16/20) | 36.12 s
    [Task 25/25]  Current/Best:    2.90/   9.22 GFLOPS | Progress: (20/20) | 46.81 s
+
    [Task 24/25]  Current/Best:    6.98/   8.40 GFLOPS | Progress: (16/20) | 40.15 s
    [Task 24/25]  Current/Best:    3.11/   8.54 GFLOPS | Progress: (20/20) | 46.60 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.54/   2.79 GFLOPS | Progress: (4/20) | 11.67 s
    [Task 25/25]  Current/Best:    5.23/   6.87 GFLOPS | Progress: (8/20) | 22.97 s
    [Task 25/25]  Current/Best:    5.62/   6.87 GFLOPS | Progress: (12/20) | 34.29 s
    [Task 25/25]  Current/Best:    5.48/   7.73 GFLOPS | Progress: (16/20) | 36.09 s
    [Task 25/25]  Current/Best:    2.71/   8.43 GFLOPS | Progress: (20/20) | 46.79 s
 
 
 
@@ -735,8 +735,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 414.51848463000715, 'median': 414.29720275000363, 'std': 0.9028848814414562}
-    unoptimized: {'mean': 494.44040632999986, 'median': 494.373336249987, 'std': 0.548164183490144}
+    optimized: {'mean': 420.60280053999804, 'median': 420.85482149999507, 'std': 1.3873905097211576}
+    unoptimized: {'mean': 501.0085916000003, 'median': 500.789817750001, 'std': 1.0780751727340638}
 
 
 
@@ -759,7 +759,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 10 minutes  27.309 seconds)
+   **Total running time of the script:** ( 10 minutes  37.666 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 6f715036b..83dcae0ef 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -269,7 +269,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.322e-07 secs/op
+    1.301e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index d5b2f100a..98fb8029e 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -262,7 +262,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0xdaa5250)), stage(b, placeholder(b, 0xd37e180)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min= [...]
+    [stage(a, placeholder(a, 0xef0ee20)), stage(b, placeholder(b, 0x277f1990)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index d6628d635..857722afa 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,30 +5,30 @@
 
 Computation times
 =================
-**13:20.860** total execution time for **tutorial** files:
+**13:17.802** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:27.309 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:37.666 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:02.747 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:03.144 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 00:56.090 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 00:41.989 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:28.274 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:28.583 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:25.094 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.399 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.675 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.185 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.515 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.686 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.156 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.149 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.000 | 0.0 MB |
-+------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.000 | 0.0 MB |
++------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 45758339e..6033437c5 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -288,7 +288,7 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000008
+    Numpy running time: 0.000009
     naive: 0.000006
 
 
@@ -390,7 +390,7 @@ compile and run this new schedule with the parallel operation applied:
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallel: 0.000007
+    parallel: 0.000006
 
 
 
@@ -499,10 +499,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    8.496479999848816e-06                    1.0
-                   naive               5.848e-06      0.6882850309897813
-                parallel               6.928e-06      0.8153964936212732
-                  vector             2.45464e-05      2.8890081540163424
+                   numpy    9.10881999971025e-06                     1.0
+                   naive    5.9355999999999995e-06    0.6516321543502682
+                parallel              6.0836e-06      0.6678801425644065
+                  vector             2.46047e-05       2.701195105489259
 
 
 
@@ -923,7 +923,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.018663
+    Numpy running time: 0.020028
 
 
 
@@ -983,7 +983,7 @@ optimizations.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    none: 3.536849
+    none: 3.520693
 
 
 
@@ -1088,7 +1088,7 @@ schedule.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    blocking: 0.310639
+    blocking: 0.328021
 
 
 
@@ -1186,7 +1186,7 @@ already cache friendly from our previous optimizations.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    vectorization: 0.342384
+    vectorization: 0.350832
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1262,7 +1262,7 @@ more cache friendly.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    loop permutation: 0.117301
+    loop permutation: 0.130305
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1363,7 +1363,7 @@ optimized schedule.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    array packing: 0.109914
+    array packing: 0.110495
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1458,7 +1458,7 @@ to `C` when all the block results are ready.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    block caching: 0.111129
+    block caching: 0.111820
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1546,7 +1546,7 @@ of thread-level parallelization.
 
     /workspace/python/tvm/driver/build_module.py:264: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallelization: 0.145001
+    parallelization: 0.146658
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1627,13 +1627,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none            3.5368489374                     1.0
-                blocking     0.31063883130000003     0.08782926180849446
-           vectorization            0.3423837228     0.09680473462677554
-        loop permutation            0.1173012078     0.03316545599661098
-           array packing            0.1099138028    0.031076759212905366
-           block caching            0.1111293247     0.03142043289575527
-         parallelization            0.1450009952     0.04099722599591511
+                    none      3.5206934830999996                     1.0
+                blocking     0.32802107999999996      0.0931694512954802
+           vectorization     0.35083211799999997     0.09964858334986021
+        loop permutation     0.13030489569999998      0.0370111446297408
+           array packing            0.1104951354     0.03138448033900075
+           block caching            0.1118197683     0.03176072237948467
+         parallelization            0.1466579804     0.04165599223675287
 
 
 
@@ -1675,7 +1675,7 @@ the computation for specific platforms.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  2.747 seconds)
+   **Total running time of the script:** ( 1 minutes  3.144 seconds)
 
 
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
diff --git a/docs/commit_hash b/docs/commit_hash
index 6bf1e7fad..c00170c19 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-b4c0bf7419950c99f8a995042aa547dc30187f4b
+d2cbdf381b68134951bfd7525c6a3a67838e5bdf
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 52c99e3c6..42afe3d0f 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -422,7 +422,7 @@ to download the full example code</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipa032f37d-d3ba-49fe-9bf4-ee085ae5cd9d from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipbd6d80df-b392-4d2e-bfba-6feeb2255041 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 51b717a58..bc54b886a 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -427,42 +427,43 @@ python3 -m pip install -f https://release.oneflow.info <span class="nv">oneflow<
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
-  0%|          | 16.0k/41.5M [00:00&lt;07:38, 94.9kB/s]
-  0%|          | 48.0k/41.5M [00:00&lt;04:49, 150kB/s]
-  0%|          | 96.0k/41.5M [00:00&lt;03:25, 211kB/s]
-  0%|          | 200k/41.5M [00:00&lt;01:57, 370kB/s]
-  1%|          | 408k/41.5M [00:00&lt;01:03, 679kB/s]
-  2%|1         | 832k/41.5M [00:01&lt;00:32, 1.30MB/s]
-  4%|3         | 1.64M/41.5M [00:01&lt;00:16, 2.51MB/s]
-  7%|7         | 3.11M/41.5M [00:01&lt;00:08, 4.54MB/s]
- 11%|#1        | 4.59M/41.5M [00:01&lt;00:06, 5.90MB/s]
- 15%|#4        | 6.05M/41.5M [00:01&lt;00:05, 6.82MB/s]
- 18%|#8        | 7.52M/41.5M [00:01&lt;00:04, 7.45MB/s]
- 22%|##1       | 9.00M/41.5M [00:02&lt;00:04, 7.89MB/s]
- 25%|##5       | 10.5M/41.5M [00:02&lt;00:03, 8.19MB/s]
- 29%|##8       | 11.9M/41.5M [00:02&lt;00:03, 8.40MB/s]
- 32%|###2      | 13.4M/41.5M [00:02&lt;00:03, 8.54MB/s]
- 36%|###5      | 14.9M/41.5M [00:02&lt;00:03, 8.64MB/s]
- 39%|###9      | 16.3M/41.5M [00:02&lt;00:03, 8.71MB/s]
- 43%|####2     | 17.8M/41.5M [00:03&lt;00:02, 8.77MB/s]
- 46%|####6     | 19.3M/41.5M [00:03&lt;00:02, 8.80MB/s]
- 50%|#####     | 20.8M/41.5M [00:03&lt;00:02, 8.82MB/s]
- 54%|#####3    | 22.2M/41.5M [00:03&lt;00:02, 8.84MB/s]
- 57%|#####7    | 23.7M/41.5M [00:03&lt;00:02, 8.85MB/s]
- 61%|######    | 25.2M/41.5M [00:03&lt;00:01, 8.85MB/s]
- 64%|######4   | 26.6M/41.5M [00:04&lt;00:01, 8.86MB/s]
- 68%|######7   | 28.1M/41.5M [00:04&lt;00:01, 8.87MB/s]
- 71%|#######1  | 29.6M/41.5M [00:04&lt;00:01, 10.0MB/s]
- 74%|#######4  | 30.8M/41.5M [00:04&lt;00:01, 10.6MB/s]
- 77%|#######6  | 31.9M/41.5M [00:04&lt;00:01, 9.68MB/s]
- 79%|#######9  | 32.9M/41.5M [00:04&lt;00:01, 8.44MB/s]
- 82%|########1 | 34.0M/41.5M [00:05&lt;00:00, 7.96MB/s]
- 85%|########5 | 35.4M/41.5M [00:05&lt;00:00, 8.26MB/s]
- 89%|########8 | 36.9M/41.5M [00:05&lt;00:00, 8.46MB/s]
- 93%|#########2| 38.4M/41.5M [00:05&lt;00:00, 8.58MB/s]
- 96%|#########6| 39.9M/41.5M [00:05&lt;00:00, 8.67MB/s]
-100%|#########9| 41.3M/41.5M [00:05&lt;00:00, 8.74MB/s]
-100%|##########| 41.5M/41.5M [00:05&lt;00:00, 7.37MB/s]
+  0%|          | 16.0k/41.5M [00:00&lt;08:27, 85.6kB/s]
+  0%|          | 48.0k/41.5M [00:00&lt;05:20, 136kB/s]
+  0%|          | 96.0k/41.5M [00:00&lt;03:48, 190kB/s]
+  0%|          | 160k/41.5M [00:00&lt;02:53, 250kB/s]
+  1%|          | 272k/41.5M [00:00&lt;01:55, 375kB/s]
+  1%|1         | 520k/41.5M [00:01&lt;01:01, 696kB/s]
+  2%|2         | 0.99M/41.5M [00:01&lt;00:31, 1.33MB/s]
+  5%|4         | 1.98M/41.5M [00:01&lt;00:15, 2.63MB/s]
+  8%|8         | 3.48M/41.5M [00:01&lt;00:09, 4.35MB/s]
+ 12%|#1        | 4.95M/41.5M [00:01&lt;00:06, 5.49MB/s]
+ 16%|#5        | 6.45M/41.5M [00:02&lt;00:05, 6.31MB/s]
+ 19%|#9        | 7.95M/41.5M [00:02&lt;00:05, 6.86MB/s]
+ 23%|##2       | 9.44M/41.5M [00:02&lt;00:04, 7.25MB/s]
+ 26%|##6       | 10.9M/41.5M [00:02&lt;00:04, 7.52MB/s]
+ 30%|##9       | 12.4M/41.5M [00:02&lt;00:03, 7.70MB/s]
+ 34%|###3      | 13.9M/41.5M [00:03&lt;00:03, 7.83MB/s]
+ 37%|###7      | 15.4M/41.5M [00:03&lt;00:03, 7.92MB/s]
+ 41%|####      | 16.9M/41.5M [00:03&lt;00:03, 7.99MB/s]
+ 44%|####4     | 18.4M/41.5M [00:03&lt;00:03, 8.03MB/s]
+ 48%|####7     | 19.9M/41.5M [00:03&lt;00:02, 8.06MB/s]
+ 52%|#####1    | 21.4M/41.5M [00:04&lt;00:02, 8.09MB/s]
+ 55%|#####5    | 22.9M/41.5M [00:04&lt;00:02, 8.82MB/s]
+ 59%|#####8    | 24.3M/41.5M [00:04&lt;00:01, 9.86MB/s]
+ 61%|######1   | 25.3M/41.5M [00:04&lt;00:01, 9.15MB/s]
+ 63%|######3   | 26.2M/41.5M [00:04&lt;00:02, 7.74MB/s]
+ 66%|######5   | 27.3M/41.5M [00:04&lt;00:01, 7.98MB/s]
+ 69%|######9   | 28.8M/41.5M [00:04&lt;00:01, 9.32MB/s]
+ 72%|#######1  | 29.7M/41.5M [00:05&lt;00:01, 8.59MB/s]
+ 74%|#######3  | 30.6M/41.5M [00:05&lt;00:01, 7.25MB/s]
+ 77%|#######6  | 31.8M/41.5M [00:05&lt;00:01, 7.08MB/s]
+ 80%|########  | 33.3M/41.5M [00:05&lt;00:01, 7.43MB/s]
+ 84%|########3 | 34.8M/41.5M [00:05&lt;00:00, 7.65MB/s]
+ 87%|########7 | 36.3M/41.5M [00:05&lt;00:00, 7.80MB/s]
+ 91%|#########1| 37.8M/41.5M [00:06&lt;00:00, 7.91MB/s]
+ 95%|#########4| 39.3M/41.5M [00:06&lt;00:00, 7.96MB/s]
+ 98%|#########8| 40.8M/41.5M [00:06&lt;00:00, 8.01MB/s]
+100%|##########| 41.5M/41.5M [00:06&lt;00:00, 6.64MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_paddle.html b/docs/how_to/compile_models/from_paddle.html
index b56574f36..f4bf49600 100644
--- a/docs/how_to/compile_models/from_paddle.html
+++ b/docs/how_to/compile_models/from_paddle.html
@@ -488,7 +488,7 @@ A quick solution is</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>TVM prediction top-1 id: 282, class name:  282: &#39;tiger cat&#39;,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.018 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.415 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-paddle-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/16269b77359771348d507395692524cf/from_paddle.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_paddle.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 13d14d583..b0541dbd5 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -409,9 +409,14 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 43%|####2     | 19.0M/44.7M [00:00&lt;00:00, 200MB/s]
- 94%|#########4| 42.1M/44.7M [00:00&lt;00:00, 225MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 223MB/s]
+  3%|2         | 1.14M/44.7M [00:00&lt;00:03, 11.9MB/s]
+  7%|7         | 3.14M/44.7M [00:00&lt;00:02, 17.2MB/s]
+ 15%|#4        | 6.49M/44.7M [00:00&lt;00:01, 25.2MB/s]
+ 26%|##6       | 11.8M/44.7M [00:00&lt;00:00, 37.3MB/s]
+ 46%|####6     | 20.8M/44.7M [00:00&lt;00:00, 57.6MB/s]
+ 64%|######3   | 28.5M/44.7M [00:00&lt;00:00, 64.6MB/s]
+ 95%|#########4| 42.4M/44.7M [00:00&lt;00:00, 90.9MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 65.0MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 889f3edf0..7a0ad7dc3 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -631,7 +631,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  2.395 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  2.449 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index 0915d8db4..83a684a59 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:24.208</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>05:30.277</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -331,43 +331,43 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>01:07.018</p></td>
+<td><p>01:07.415</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:02.395</p></td>
+<td><p>01:02.449</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>00:57.323</p></td>
+<td><p>00:59.398</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:32.183</p></td>
+<td><p>00:32.694</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:24.387</p></td>
+<td><p>00:24.820</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:23.782</p></td>
+<td><p>00:23.188</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:21.703</p></td>
+<td><p>00:22.603</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:19.742</p></td>
+<td><p>00:20.546</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:13.015</p></td>
+<td><p>00:14.633</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.659</p></td>
+<td><p>00:02.532</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 7772644fe..66cb76cf1 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -648,7 +648,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.8272      16.9435      17.2626      16.0832       0.3775
+  16.6404      16.6547      16.7156      16.5017       0.0678
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index a0252d6af..b8dc4b22f 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -431,14 +431,17 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
- 11%|#1        | 18.8M/170M [00:00&lt;00:00, 197MB/s]
- 25%|##4       | 42.3M/170M [00:00&lt;00:00, 226MB/s]
- 38%|###7      | 63.9M/170M [00:00&lt;00:00, 204MB/s]
- 53%|#####3    | 90.0M/170M [00:00&lt;00:00, 230MB/s]
- 66%|######6   | 112M/170M [00:00&lt;00:00, 210MB/s]
- 78%|#######8  | 133M/170M [00:00&lt;00:00, 189MB/s]
- 89%|########9 | 151M/170M [00:00&lt;00:00, 188MB/s]
-100%|##########| 170M/170M [00:00&lt;00:00, 206MB/s]
+  2%|1         | 2.94M/170M [00:00&lt;00:05, 30.6MB/s]
+  5%|5         | 8.88M/170M [00:00&lt;00:03, 49.2MB/s]
+ 12%|#1        | 20.1M/170M [00:00&lt;00:01, 80.6MB/s]
+ 23%|##2       | 38.7M/170M [00:00&lt;00:01, 125MB/s]
+ 34%|###4      | 58.3M/170M [00:00&lt;00:00, 154MB/s]
+ 46%|####5     | 77.7M/170M [00:00&lt;00:00, 171MB/s]
+ 57%|#####6    | 96.7M/170M [00:00&lt;00:00, 180MB/s]
+ 68%|######8   | 116M/170M [00:00&lt;00:00, 187MB/s]
+ 80%|#######9  | 135M/170M [00:00&lt;00:00, 192MB/s]
+ 91%|#########1| 155M/170M [00:01&lt;00:00, 196MB/s]
+100%|##########| 170M/170M [00:01&lt;00:00, 165MB/s]
 /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=&#39;floor&#39;).
@@ -533,7 +536,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  55.642 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  6.174 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index 72084b0de..efd605d8f 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -472,9 +472,11 @@ training. Other models require a full post training calibration.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
- 28%|##7       | 3.76M/13.6M [00:00&lt;00:00, 39.4MB/s]
- 76%|#######5  | 10.2M/13.6M [00:00&lt;00:00, 56.2MB/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 56.8MB/s]
+  6%|6         | 880k/13.6M [00:00&lt;00:01, 8.92MB/s]
+ 19%|#8        | 2.51M/13.6M [00:00&lt;00:00, 13.8MB/s]
+ 40%|####      | 5.46M/13.6M [00:00&lt;00:00, 21.6MB/s]
+ 80%|########  | 10.8M/13.6M [00:00&lt;00:00, 35.3MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 32.6MB/s]
 </pre></div>
 </div>
 </div>
@@ -563,7 +565,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.3291      90.2933      91.0451      90.1698       0.1479
+  90.5948      90.5228      93.5147      90.3220       0.3339
 </pre></div>
 </div>
 <div class="admonition note">
@@ -602,7 +604,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.132 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.519 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index adfe14b37..ad54364d8 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -565,7 +565,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  120.2484     120.0720     126.9187     119.4935      0.8613
+  121.0160     120.9833     122.6799     120.1841      0.4180
 </pre></div>
 </div>
 <div class="admonition note">
@@ -593,7 +593,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  5.377 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  0.313 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 386cc2438..bdaa0e0c8 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -504,7 +504,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  12.196 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  14.345 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 96b2c30a2..9e4dab076 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -436,23 +436,24 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  4%|4         | 5767/132723 [00:00&lt;00:02, 57666.76KB/s]
- 10%|#         | 13759/132723 [00:00&lt;00:01, 70754.06KB/s]
- 16%|#6        | 21882/132723 [00:00&lt;00:01, 75535.42KB/s]
- 23%|##2       | 30005/132723 [00:00&lt;00:01, 77777.59KB/s]
- 29%|##8       | 38151/132723 [00:00&lt;00:01, 79103.05KB/s]
- 35%|###4      | 46259/132723 [00:00&lt;00:01, 79772.93KB/s]
- 41%|####      | 54350/132723 [00:00&lt;00:00, 80143.23KB/s]
- 47%|####7     | 62512/132723 [00:00&lt;00:00, 80610.73KB/s]
- 53%|#####3    | 70648/132723 [00:00&lt;00:00, 80842.38KB/s]
- 59%|#####9    | 78823/132723 [00:01&lt;00:00, 81119.78KB/s]
- 66%|######5   | 86935/132723 [00:01&lt;00:00, 81071.90KB/s]
- 72%|#######1  | 95043/132723 [00:01&lt;00:00, 80891.95KB/s]
- 78%|#######7  | 103133/132723 [00:01&lt;00:00, 80831.41KB/s]
- 84%|########3 | 111221/132723 [00:01&lt;00:00, 80844.69KB/s]
- 90%|######### | 119501/132723 [00:01&lt;00:00, 81431.11KB/s]
- 96%|#########6| 127671/132723 [00:01&lt;00:00, 81509.19KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 79764.98KB/s]
+  4%|3         | 5263/132723 [00:00&lt;00:02, 52624.28KB/s]
+ 10%|9         | 13054/132723 [00:00&lt;00:01, 67491.64KB/s]
+ 16%|#5        | 21085/132723 [00:00&lt;00:01, 73335.68KB/s]
+ 22%|##1       | 29149/132723 [00:00&lt;00:01, 76214.38KB/s]
+ 28%|##7       | 37121/132723 [00:00&lt;00:01, 77474.02KB/s]
+ 34%|###3      | 44869/132723 [00:00&lt;00:01, 76415.03KB/s]
+ 40%|###9      | 52829/132723 [00:00&lt;00:01, 77441.08KB/s]
+ 46%|####5     | 60802/132723 [00:00&lt;00:00, 78162.09KB/s]
+ 52%|#####1    | 68779/132723 [00:00&lt;00:00, 78660.90KB/s]
+ 58%|#####7    | 76647/132723 [00:01&lt;00:00, 78492.92KB/s]
+ 64%|######3   | 84498/132723 [00:01&lt;00:00, 78016.11KB/s]
+ 70%|######9   | 92302/132723 [00:01&lt;00:00, 77820.87KB/s]
+ 75%|#######5  | 100086/132723 [00:01&lt;00:00, 77590.94KB/s]
+ 81%|########1 | 107846/132723 [00:01&lt;00:00, 77414.04KB/s]
+ 87%|########7 | 115588/132723 [00:01&lt;00:00, 77258.94KB/s]
+ 93%|#########2| 123315/132723 [00:01&lt;00:00, 77236.76KB/s]
+ 99%|#########8| 131081/132723 [00:01&lt;00:00, 77360.57KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 76656.68KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -495,7 +496,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  17.296 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  24.372 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 558ecd16f..c7124c59f 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>10:30.340</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>10:48.811</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -331,31 +331,31 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>02:55.642</p></td>
+<td><p>03:06.174</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>02:17.296</p></td>
+<td><p>02:24.372</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>02:05.377</p></td>
+<td><p>02:00.313</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:12.196</p></td>
+<td><p>01:14.345</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:07.132</p></td>
+<td><p>01:10.519</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:30.069</p></td>
+<td><p>00:29.684</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:22.622</p></td>
+<td><p>00:23.399</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index 4ffd08844..48d458112 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -604,7 +604,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipf8e938c7-37eb-403a-9fff-f2181100ed36 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipd7544d1b-4b64-40cc-8ec0-ece6d947d485 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index 47b2cf7cd..c54db2c0e 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:39.974</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:41.837</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -331,19 +331,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:36.858</p></td>
+<td><p>00:38.203</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.196</p></td>
+<td><p>00:02.362</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:00.914</p></td>
+<td><p>00:01.265</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
-<td><p>00:00.006</p></td>
+<td><p>00:00.007</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index ae6d82bce..a996c2104 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -507,10 +507,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6813us [6813us] (46.23%; 46.23%)
-FoldScaleAxis: 7925us [7us] (53.77%; 53.77%)
-        FoldConstant: 7918us [1609us] (53.73%; 99.91%)
-                InferType: 6309us [6309us] (42.81%; 79.68%)
+InferType: 7082us [7082us] (45.55%; 45.55%)
+FoldScaleAxis: 8467us [8us] (54.45%; 54.45%)
+        FoldConstant: 8459us [1599us] (54.40%; 99.90%)
+                InferType: 6860us [6860us] (44.12%; 81.10%)
 </pre></div>
 </div>
 </div>
@@ -532,10 +532,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6324us [6324us] (44.61%; 44.61%)
-FoldScaleAxis: 7853us [5us] (55.39%; 55.39%)
-        FoldConstant: 7848us [1609us] (55.36%; 99.94%)
-                InferType: 6239us [6239us] (44.01%; 79.49%)
+InferType: 7021us [7021us] (45.79%; 45.79%)
+FoldScaleAxis: 8313us [7us] (54.21%; 54.21%)
+        FoldConstant: 8306us [1646us] (54.17%; 99.92%)
+                InferType: 6660us [6660us] (43.43%; 80.18%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index aaadbad95..d477112ee 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -556,7 +556,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.147519 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.170101 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 447c4e5a4..5606b9b5b 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -898,7 +898,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 7.228174 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 8.953879 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index fb1c7daa4..03ea94833 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -453,8 +453,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018909
-Baseline: 3.540226
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019942
+Baseline: 3.478523
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -514,7 +514,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.298191
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.326673
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -581,7 +581,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.336718
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.346013
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -642,7 +642,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.119759
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.136822
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -725,7 +725,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.111192
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.112451
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -811,7 +811,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.112104
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.113851
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -901,7 +901,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.145330
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.146176
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index 55c6fc7b1..3b735788d 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:35.000</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:35.504</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,15 +331,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:32.637</p></td>
+<td><p>00:33.306</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.323</p></td>
+<td><p>00:01.213</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:01.040</p></td>
+<td><p>00:00.985</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index a98d50028..dca97fcdc 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:09.410</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>05:19.865</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -331,27 +331,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>02:31.305</p></td>
+<td><p>02:38.800</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:20.772</p></td>
+<td><p>01:22.662</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>00:43.249</p></td>
+<td><p>00:43.835</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:16.972</p></td>
+<td><p>00:16.758</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:08.567</p></td>
+<td><p>00:08.951</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:08.545</p></td>
+<td><p>00:08.859</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index 5b90acf45..5a05de383 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -486,483 +486,43 @@ cooperative fetching, unrolling and operator fusion.</p>
              compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
   preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 28;
-  allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
-    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 16;
+  allocate(conv2d_nchw: Pointer(local float32), float32, [4]), storage_scope = local;
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [504]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [768]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 392 {
+    conv2d_nchw_1: Buffer(conv2d_nchw, float32, [4], [], scope=&quot;local&quot;, align=16)[0] = 0f32
     conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[2] = 0f32
     conv2d_nchw_1[3] = 0f32
-    conv2d_nchw_1[4] = 0f32
-    conv2d_nchw_1[5] = 0f32
-    conv2d_nchw_1[6] = 0f32
-    conv2d_nchw_1[7] = 0f32
-    conv2d_nchw_1[8] = 0f32
-    conv2d_nchw_1[9] = 0f32
-    conv2d_nchw_1[10] = 0f32
-    conv2d_nchw_1[11] = 0f32
-    conv2d_nchw_1[12] = 0f32
-    conv2d_nchw_1[13] = 0f32
     for (rc.outer.outer: int32, 0, 64) {
-      for (ry.outer.outer: int32, 0, 3) {
-        let cse_var_2: int32 = (rc.outer.outer*72)
-        let cse_var_1: int32 = (ry.outer.outer*3)
-         {
-          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope=&quot;shared&quot;)[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) +  [...]
-            }
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0 [...]
-            }
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0 [...]
-            }
-            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
-              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0 [...]
-            }
+      for (rx.outer.outer: int32, 0, 3) {
+        attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 392;
+        pad_temp.shared_1: Buffer(pad_temp.shared, float32, [504], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else(((((7 &lt;= floormod(threadIdx.x_1, 63)) &amp;&amp; (floormod(threadIdx.x_1, 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[(((((rc.outer.outer*392) + (floordiv(threadIdx.x_1, 63)*49)) + rx.outer.outer) + floormod(threadIdx.x_1, 63)) - 8)], 0f32, dtype [...]
+        attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 392;
+        if @tir.likely((threadIdx.x_1 &lt; 112), dtype=bool) {
+          pad_temp.shared_1[(threadIdx.x_1 + 392)] = @tir.if_then_else(((((1 &lt;= floormod((floordiv(threadIdx.x_1, 7) + 2), 9)) &amp;&amp; (floormod((floordiv(threadIdx.x_1, 7) + 2), 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx.outer.outer + floormod(threadIdx.x_1, 7)))) &amp;&amp; ((rx.outer.outer + floormod(threadIdx.x_1, 7)) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv((floordiv(threadIdx.x_1, 7) + 56), 9)*49)) + (floormod((floordiv(threadIdx.x_1, 7) + 2), 9)*7)) + rx.outer.outer) +  [...]
+        }
+        attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 392;
+        kernel.shared_1: Buffer(kernel.shared, float32, [768], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[(((((blockIdx.x*147456) + (floordiv(threadIdx.x_2, 24)*4608)) + (rc.outer.outer*72)) + (floormod(threadIdx.x_2, 24)*3)) + rx.outer.outer)]
+        attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 392;
+        if @tir.likely((threadIdx.x_2 &lt; 376), dtype=bool) {
+          kernel.shared_1[(threadIdx.x_2 + 392)] = kernel[((((((blockIdx.x*147456) + (floordiv((floordiv(threadIdx.x_2, 8) + 49), 3)*4608)) + (rc.outer.outer*72)) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + (floormod((threadIdx.x_2 + 2), 3)*3)) + rx.outer.outer)]
+        }
+        for (rc.outer.inner: int32, 0, 4) {
+          for (ff.outer.inner: int32, 0, 4) {
+            conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[((rc.outer.inner*126) + floormod(threadIdx.x, 49))]*kernel.shared_1[(((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6))]))
+            conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 7)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 1)]))
+            conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 14)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 2)]))
+            conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 63)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 3)]))
+            conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 70)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 4)]))
+            conv2d_nchw_1[ff.outer.inner] = (conv2d_nchw_1[ff.outer.inner] + (pad_temp.shared_1[(((rc.outer.inner*126) + floormod(threadIdx.x, 49)) + 77)]*kernel.shared_1[((((floordiv(threadIdx.x, 49)*96) + (ff.outer.inner*24)) + (rc.outer.inner*6)) + 5)]))
           }
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 8), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 16), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 32), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 32), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 64), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 40), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 80), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 56), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 112), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 64), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 128), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 80), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 160), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 88), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 176), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 104), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 208), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 112), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 224), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 128), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 256), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 136), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 272), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 152), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 304), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 160), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 320), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 176), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 352), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 184), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 368), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 200), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 400), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 208), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 416), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 224), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 448), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 232), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 464), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 248), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 496), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 256), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 512), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 272), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 544), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 280), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 560), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 296), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 592), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 304), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 608), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 320), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 640), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 328), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 656), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 344), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 688), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 352), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 704), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(floordiv(threadIdx.x_2, 8), 3)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 368), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 736), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
-          kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((floordiv(threadIdx.x_2, 8) + 376), 3)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 752), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
-          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
-          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
         }
       }
     }
-    for (i1.inner: int32, 0, 2) {
-      for (i3.inner: int32, 0, 7) {
-        compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
-      }
+    for (i1.inner: int32, 0, 4) {
+      compute[((((blockIdx.x*1568) + (floordiv(threadIdx.x, 49)*196)) + (i1.inner*49)) + floormod(threadIdx.x, 49))] = max((conv2d_nchw_1[i1.inner] + bias[(((blockIdx.x*32) + (floordiv(threadIdx.x, 49)*4)) + i1.inner)]), 0f32)
     }
   }
 }
@@ -999,7 +559,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.363 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.337 ms
 </pre></div>
 </div>
 </div>
@@ -1029,35 +589,35 @@ conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
 conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=4)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=8)
 conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
 conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
 conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
-conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
+conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=3)
 conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
 conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=4)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=8)
 compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
 compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1077,14 +637,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=392)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=392)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 16)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -1102,430 +662,40 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[14];
-  __shared__ float pad_temp_shared[72];
-  __shared__ float kernel_shared[3072];
+extern &quot;C&quot; __global__ void __launch_bounds__(392) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[4];
+  __shared__ float pad_temp_shared[504];
+  __shared__ float kernel_shared[768];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
   conv2d_nchw[3] = 0.000000e+00f;
-  conv2d_nchw[4] = 0.000000e+00f;
-  conv2d_nchw[5] = 0.000000e+00f;
-  conv2d_nchw[6] = 0.000000e+00f;
-  conv2d_nchw[7] = 0.000000e+00f;
-  conv2d_nchw[8] = 0.000000e+00f;
-  conv2d_nchw[9] = 0.000000e+00f;
-  conv2d_nchw[10] = 0.000000e+00f;
-  conv2d_nchw[11] = 0.000000e+00f;
-  conv2d_nchw[12] = 0.000000e+00f;
-  conv2d_nchw[13] = 0.000000e+00f;
   for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
-    for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
+    for (int rx_outer_outer = 0; rx_outer_outer &lt; 3; ++rx_outer_outer) {
       __syncthreads();
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 9))) &amp;&amp; (((((int)threadIdx.x) * 4) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
-      }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
+      pad_temp_shared[((int)threadIdx.x)] = (((((7 &lt;= (((int)threadIdx.x) % 63)) &amp;&amp; ((((int)threadIdx.x) % 63) &lt; 56)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[(((((rc_outer_outer * 392) + ((((int)threadIdx.x) / 63) * 49)) + rx_outer_outer) + (((int)threadIdx.x) % 63)) - 8)] : 0.000000e+00f);
+      if (((int)threadIdx.x) &lt; 112) {
+        pad_temp_shared[(((int)threadIdx.x) + 392)] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + 2) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + 2) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 392) / 63) * 49)) + ((((((int)threadIdx.x) / 7) + 2) % 9) * 7)) + rx_outer_outer) + (((int)threadIdx.x) % 7)) - 8)] : 0.000000e+00f);
       }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
+      kernel_shared[((int)threadIdx.x)] = kernel[(((((((int)blockIdx.x) * 147456) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((int)threadIdx.x) % 24) * 3)) + rx_outer_outer)];
+      if (((int)threadIdx.x) &lt; 376) {
+        kernel_shared[(((int)threadIdx.x) + 392)] = kernel[((((((((int)blockIdx.x) * 147456) + (((((int)threadIdx.x) + 392) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (((((int)threadIdx.x) + 2) % 3) * 3)) + rx_outer_outer)];
       }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
-      kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
-      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
-      kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
-      kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
-      kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
-      kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
-      kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
-      kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
-      kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
-      kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
-      kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
-      kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
-      kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
-      kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
-      kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
       __syncthreads();
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      for (int rc_outer_inner = 0; rc_outer_inner &lt; 4; ++rc_outer_inner) {
+        for (int ff_outer_inner = 0; ff_outer_inner &lt; 4; ++ff_outer_inner) {
+          conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[((rc_outer_inner * 126) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6))]));
+          conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 7)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 1)]));
+          conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 14)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 2)]));
+          conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 63)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 3)]));
+          conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 70)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 4)]));
+          conv2d_nchw[ff_outer_inner] = (conv2d_nchw[ff_outer_inner] + (pad_temp_shared[(((rc_outer_inner * 126) + (((int)threadIdx.x) % 49)) + 77)] * kernel_shared[(((((((int)threadIdx.x) / 49) * 96) + (ff_outer_inner * 24)) + (rc_outer_inner * 6)) + 5)]));
+        }
+      }
     }
   }
-  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
-    for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
-      compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
-    }
+  for (int i1_inner = 0; i1_inner &lt; 4; ++i1_inner) {
+    compute[((((((int)blockIdx.x) * 1568) + ((((int)threadIdx.x) / 49) * 196)) + (i1_inner * 49)) + (((int)threadIdx.x) % 49))] = max((conv2d_nchw[i1_inner] + bias[(((((int)blockIdx.x) * 32) + ((((int)threadIdx.x) / 49) * 4)) + i1_inner)]), 0.000000e+00f);
   }
 }
 </pre></div>
@@ -1562,7 +732,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  31.305 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  38.800 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index 384866d59..f285ca5a5 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -901,7 +901,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   9.9480       9.9365       9.9865       9.9211       0.0279
+   9.6032       9.6115       9.6151       9.5830       0.0144
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index 67ba336c5..c7d1b8fc5 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -920,7 +920,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  758.2846     758.0884     758.6797     758.0856      0.2794
+  767.6131     768.6869     769.0931     765.0593      1.8134
 </pre></div>
 </div>
 </div>
@@ -942,7 +942,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  20.772 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  22.662 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index 5346401bb..3db0912d0 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -620,76 +620,30 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-  preflattened_buffer_map = {compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_7: placeholder_15: Buffer(placeholder_12, int32, [4916], []), placeholder_6: placeholder_16: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_5: placeholder_18: Buffer(placeholder_10, float32, [128, 256], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], [])} {
-  for (i0.outer.i1.outer.fused: int32, 0, 32) &quot;parallel&quot; {
-    allocate(compute_4: Pointer(global float32), float32, [2048]), storage_scope = global {
-      for (nb_j.inner: int32, 0, 2) {
-        for (i.inner.init: int32, 0, 64) {
-          let cse_var_1: int32 = ((i.inner.init*32) + (nb_j.inner*16))
-           {
-            compute_5: Buffer(compute_4, float32, [2048], [])[cse_var_1] = 0f32
-            compute_5[(cse_var_1 + 1)] = 0f32
-            compute_5[(cse_var_1 + 2)] = 0f32
-            compute_5[(cse_var_1 + 3)] = 0f32
-            compute_5[(cse_var_1 + 4)] = 0f32
-            compute_5[(cse_var_1 + 5)] = 0f32
-            compute_5[(cse_var_1 + 6)] = 0f32
-            compute_5[(cse_var_1 + 7)] = 0f32
-            compute_5[(cse_var_1 + 8)] = 0f32
-            compute_5[(cse_var_1 + 9)] = 0f32
-            compute_5[(cse_var_1 + 10)] = 0f32
-            compute_5[(cse_var_1 + 11)] = 0f32
-            compute_5[(cse_var_1 + 12)] = 0f32
-            compute_5[(cse_var_1 + 13)] = 0f32
-            compute_5[(cse_var_1 + 14)] = 0f32
-            compute_5[(cse_var_1 + 15)] = 0f32
+  preflattened_buffer_map = {placeholder_5: placeholder_15: Buffer(placeholder_10, float32, [128, 256], []), placeholder_8: placeholder_16: Buffer(placeholder_13, int32, [33], []), placeholder_7: placeholder_17: Buffer(placeholder_12, int32, [4916], []), placeholder_9: placeholder_18: Buffer(placeholder_14, float32, [128, 512], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_6: placeholder_19: Buffer(placeholder_11, float32, [4916, 16, 1], [])} {
+  for (i0.outer.i1.outer.fused: int32, 0, 256) &quot;parallel&quot; {
+    allocate(compute_4: Pointer(global float32), float32, [256]), storage_scope = global {
+      for (i.outer.inner: int32, 0, 2) {
+        for (i.inner.init: int32, 0, 8) {
+          for (j.init: int32, 0, 16) {
+            compute_5: Buffer(compute_4, float32, [256], [])[(((i.outer.inner*128) + (i.inner.init*16)) + j.init)] = 0f32
           }
         }
-        for (elem_idx: int32, 0, let cse_var_2: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner) in (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])) {
-          for (i.inner: int32, 0, 64) {
-            let cse_var_21: int32 = (elem_idx*16)
-            let cse_var_20: int32 = ((i.inner*32) + (nb_j.inner*16))
-            let cse_var_19: int32 = ((floormod(i0.outer.i1.outer.fused, 16)*2) + nb_j.inner)
-            let cse_var_18: int32 = ((floordiv(i0.outer.i1.outer.fused, 16)*16384) + (i.inner*256))
-            let cse_var_17: int32 = (cse_var_20 + 9)
-            let cse_var_16: int32 = (cse_var_20 + 8)
-            let cse_var_15: int32 = (cse_var_20 + 7)
-            let cse_var_14: int32 = (cse_var_20 + 6)
-            let cse_var_13: int32 = (cse_var_20 + 5)
-            let cse_var_12: int32 = (cse_var_20 + 4)
-            let cse_var_11: int32 = (cse_var_20 + 3)
-            let cse_var_10: int32 = (cse_var_20 + 2)
-            let cse_var_9: int32 = (cse_var_20 + 15)
-            let cse_var_8: int32 = (cse_var_20 + 14)
-            let cse_var_7: int32 = (cse_var_20 + 13)
-            let cse_var_6: int32 = (cse_var_20 + 12)
-            let cse_var_5: int32 = (cse_var_20 + 11)
-            let cse_var_4: int32 = (cse_var_20 + 10)
-            let cse_var_3: int32 = (cse_var_20 + 1)
-             {
-              compute_5[cse_var_20] = (compute_5[cse_var_20] + (placeholder_1[((placeholder_3[cse_var_19]*16) + cse_var_21)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 1)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_10] = (compute_5[cse_var_10] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 2)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_11] = (compute_5[cse_var_11] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 3)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_12] = (compute_5[cse_var_12] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 4)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_13] = (compute_5[cse_var_13] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 5)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_14] = (compute_5[cse_var_14] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 6)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_15] = (compute_5[cse_var_15] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 7)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_16] = (compute_5[cse_var_16] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 8)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_17] = (compute_5[cse_var_17] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 9)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_4] = (compute_5[cse_var_4] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 10)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_5] = (compute_5[cse_var_5] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 11)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_6] = (compute_5[cse_var_6] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 12)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_7] = (compute_5[cse_var_7] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 13)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_8] = (compute_5[cse_var_8] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 14)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
-              compute_5[cse_var_9] = (compute_5[cse_var_9] + (placeholder_1[(((placeholder_3[cse_var_19]*16) + cse_var_21) + 15)]*max(placeholder[(cse_var_18 + placeholder_2[(placeholder_3[cse_var_19] + elem_idx)])], 0f32)))
+        for (elem_idx: int32, 0, let cse_var_1: int32 = floormod(i0.outer.i1.outer.fused, 32) in (placeholder_3[(cse_var_1 + 1)] - placeholder_3[cse_var_1])) {
+          if let cse_var_2: int32 = floormod(i0.outer.i1.outer.fused, 32) in @tir.likely((elem_idx &lt; (placeholder_3[(cse_var_2 + 1)] - placeholder_3[cse_var_2])), dtype=bool) {
+            for (i.inner: int32, 0, 8) {
+              for (j: int32, 0, 16) {
+                let cse_var_4: int32 = floormod(i0.outer.i1.outer.fused, 32)
+                let cse_var_3: int32 = (((i.outer.inner*128) + (i.inner*16)) + j)
+                compute_5[cse_var_3] = (compute_5[cse_var_3] + (placeholder_1[(((placeholder_3[cse_var_4]*16) + (elem_idx*16)) + j)]*max(placeholder[((((floordiv(i0.outer.i1.outer.fused, 32)*4096) + (i.outer.inner*2048)) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_4] + elem_idx)])], 0f32)))
+              }
             }
           }
         }
       }
-      for (i0.inner: int32, 0, 64) {
-        let cse_var_22: int32 = (((floordiv(i0.outer.i1.outer.fused, 16)*32768) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 16)*32))
-        compute[ramp(cse_var_22, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_22, 1, 32)]), broadcast(0f32, 32))
+      for (i0.inner: int32, 0, 16) {
+        let cse_var_5: int32 = (((floordiv(i0.outer.i1.outer.fused, 32)*8192) + (i0.inner*512)) + (floormod(i0.outer.i1.outer.fused, 32)*16))
+        compute[ramp(cse_var_5, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_5, 1, 16)]), broadcast(0f32, 16))
       }
     }
   }
@@ -727,7 +681,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.849 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.564 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index 856f9222e..6d043fbc6 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:43.394</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:44.228</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -331,11 +331,11 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:43.362</p></td>
+<td><p>00:44.198</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
-<td><p>00:00.019</p></td>
+<td><p>00:00.016</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
@@ -343,7 +343,7 @@
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
-<td><p>00:00.004</p></td>
+<td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></td>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index 031a007e6..d7a2007bf 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -1164,8 +1164,8 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2885496
-No: 6   GFLOPS: 100.64/100.64   result: MeasureResult(costs=(0.0023003807083333333,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6175487041473389, timestamp=1656027173.1325228)      [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
-No: 7   GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 6   GFLOPS: 94.20/94.20     result: MeasureResult(costs=(0.002457435416666667,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.801192283630371, timestamp=1656051922.092587) [(&#39;tile_f&#39;, [-1, 1, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3754080
+No: 7   GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1288,7 +1288,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 16, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6225319
-No: 8   GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 8   GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1411,7 +1411,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,943546
-No: 9   GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 9   GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1534,7 +1534,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 16, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2868708
-No: 10  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 10  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
     res = future.result()
   File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
@@ -1552,7 +1552,7 @@ No: 10  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
 TimeoutError
 
         [(&#39;tile_f&#39;, [-1, 32, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4691833
-No: 11  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 11  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1675,7 +1675,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,1042124
-No: 12  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 12  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1798,7 +1798,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 32, 16]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10013405
-No: 13  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 13  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1921,7 +1921,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 4, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6732082
-No: 14  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 14  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2044,7 +2044,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 32]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7536735
-No: 15  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 15  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2167,7 +2167,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 128, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,482121
-No: 16  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 16  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2290,7 +2290,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 32, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2824525
-No: 17  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 17  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2413,7 +2413,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4559286
-No: 18  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 18  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2536,7 +2536,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 32, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9677544
-No: 19  GFLOPS: 0.00/100.64     result: Traceback (most recent call last):
+No: 19  GFLOPS: 0.00/94.20      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 738, in __call__
     yield remote, remote.load_module(os.path.split(build_result.filename)[1])
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 702, in run_through_rpc
@@ -2624,7 +2624,7 @@ tvm._ffi.base.TVMError: Traceback (most recent call last):
   15: _PyEval_EvalFrameDefault
   14: 0x0000000000537c30
   13: _PyObject_FastCallKeywords
-  12: 0x00007fc50b4d8fa2
+  12: 0x00007f0129a2dfa2
   11: _ctypes_callproc
   10: ffi_call
   9: ffi_call_unix64
@@ -2689,7 +2689,7 @@ Traceback (most recent call last):
   21: _PyFunction_FastCallKeywords
   20: _PyEval_EvalFrameDefault
   19: _PyFunction_FastCall      [(&#39;tile_f&#39;, [-1, 8, 2, 16]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6390073
-No: 20  GFLOPS: 142.17/142.17   result: MeasureResult(costs=(0.0016283737099999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4156231880187988, timestamp=1656027199.6258023)      [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
+No: 20  GFLOPS: 144.37/144.37   result: MeasureResult(costs=(0.00160355996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.4382915496826172, timestamp=1656051948.7935243)      [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2730,7 +2730,7 @@ and measure running time.</p>
 Best config:
 [(&#39;tile_f&#39;, [-1, 1, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9881539
 Finish loading 20 records
-Time cost of this operator: 0.001987
+Time cost of this operator: 0.002021
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index ca53aa3d8..c167bc79f 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -578,10 +578,10 @@ the tuned operator.</p>
 ########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs
 ---------                                     ---                                           --------  -------  -----              ------  -------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  314.2     98.742   (1, 2, 10, 10, 3)  2       1
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.085     0.969    (1, 6, 10, 10)     1       1
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.917     0.288    (1, 1, 10, 10, 3)  1       1
-Total_time                                    -                                             318.202   -        -                  -       -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  313.8     98.72    (1, 2, 10, 10, 3)  2       1
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.15      0.991    (1, 6, 10, 10)     1       1
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.918     0.289    (1, 1, 10, 10, 3)  1       1
+Total_time                                    -                                             317.868   -        -                  -       -
 </pre></div>
 </div>
 </div>
@@ -634,10 +634,10 @@ Total_time                                    -
 ########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs
 ---------                                     ---                                           --------  -------  -----              ------  -------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  77.1      96.686   (1, 6, 10, 10, 1)  2       1
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.741     2.184    (1, 6, 10, 10)     1       1
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.901     1.13     (1, 1, 10, 10, 3)  1       1
-Total_time                                    -                                             79.742    -        -                  -       -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  247.5     98.827   (1, 1, 10, 10, 6)  2       1
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       2.012     0.803    (1, 6, 10, 10)     1       1
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.925     0.369    (1, 1, 10, 10, 3)  1       1
+Total_time                                    -                                             250.437   -        -                  -       -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index a1a372ad1..cb07b0da4 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -510,7 +510,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpucmdjl8m/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmp43fbeicb/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -570,8 +570,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpucmdjl8m/images/target contains 8144 images
-/tmp/tmpucmdjl8m/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp43fbeicb/images/target contains 8144 images
+/tmp/tmp43fbeicb/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -683,13 +683,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 55s - loss: 0.2270 - accuracy: 0.9237 - val_loss: 0.1414 - val_accuracy: 0.9596
+328/328 - 56s - loss: 0.2080 - accuracy: 0.9267 - val_loss: 0.1195 - val_accuracy: 0.9603
 Epoch 2/3
-328/328 - 52s - loss: 0.0998 - accuracy: 0.9615 - val_loss: 0.1108 - val_accuracy: 0.9641
+328/328 - 53s - loss: 0.0932 - accuracy: 0.9637 - val_loss: 0.1037 - val_accuracy: 0.9653
 Epoch 3/3
-328/328 - 52s - loss: 0.0668 - accuracy: 0.9755 - val_loss: 0.1075 - val_accuracy: 0.9656
+328/328 - 53s - loss: 0.0592 - accuracy: 0.9782 - val_loss: 0.0993 - val_accuracy: 0.9660
 
-&lt;keras.callbacks.History object at 0x7f994ce79d90&gt;
+&lt;keras.callbacks.History object at 0x7f31651d4450&gt;
 </pre></div>
 </div>
 </div>
@@ -951,7 +951,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 7 minutes  46.380 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 7 minutes  51.058 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index 2378792a8..72ed5eda1 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>08:33.417</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>08:40.588</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,15 +331,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>07:46.380</p></td>
+<td><p>07:51.058</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:43.514</p></td>
+<td><p>00:45.746</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:03.523</p></td>
+<td><p>00:03.784</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index 5d7ffdd9d..fc354da73 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:10.088</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:11.521</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,11 +331,11 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:08.287</p></td>
+<td><p>00:09.886</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.795</p></td>
+<td><p>00:01.629</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index 5716b3602..e71fccf9a 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -515,7 +515,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f98bb001a70&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f30e3bf1c20&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index a8618631e..08d427622 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -322,7 +322,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:04.038</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:04.167</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -331,31 +331,31 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:01.886</p></td>
+<td><p>00:01.952</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:00.945</p></td>
+<td><p>00:00.971</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.524</p></td>
+<td><p>00:00.542</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.511</p></td>
+<td><p>00:00.525</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.098</p></td>
+<td><p>00:00.102</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
-<td><p>00:00.034</p></td>
+<td><p>00:00.036</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
-<td><p>00:00.028</p></td>
+<td><p>00:00.027</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index 7f49b04cc..23eb11f1c 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -571,7 +571,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C}
   preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpde4lmcy4/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpde4lmcy4/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpweizmmus/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpweizmmus/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/reference/api/doxygen/apply__history__best_8h.html b/docs/reference/api/doxygen/apply__history__best_8h.html
index 8beacbdc7..deb9cf750 100644
--- a/docs/reference/api/doxygen/apply__history__best_8h.html
+++ b/docs/reference/api/doxygen/apply__history__best_8h.html
@@ -82,7 +82,7 @@ $(function() {
 </div><div class="textblock"><div class="dynheader">
 Include dependency graph for apply_history_best.h:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="apply__history__best_8h__incl.svg" width="5008" height="1246"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="apply__history__best_8h__incl.svg" width="4976" height="1246"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/apply__history__best_8h__incl.svg b/docs/reference/api/doxygen/apply__history__best_8h__incl.svg
index 905155b47..3ff9eb180 100644
--- a/docs/reference/api/doxygen/apply__history__best_8h__incl.svg
+++ b/docs/reference/api/doxygen/apply__history__best_8h__incl.svg
@@ -4,1724 +4,1730 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/meta_schedule/apply_history_best.h Pages: 1 -->
-<svg width="3756pt" height="934pt"
- viewBox="0.00 0.00 3755.50 934.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="3732pt" height="934pt"
+ viewBox="0.00 0.00 3732.02 934.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 930)">
 <title>include/tvm/meta_schedule/apply_history_best.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-930 3751.5,-930 3751.5,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-930 3728.0178,-930 3728.0178,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1310.5,-895.5 1310.5,-925.5 1462.5,-925.5 1462.5,-895.5 1310.5,-895.5"/>
-<text text-anchor="start" x="1318.5" y="-913.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1386.5" y="-902.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/apply_history_best.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="1269.0178,-895.5 1269.0178,-925.5 1421.0178,-925.5 1421.0178,-895.5 1269.0178,-895.5"/>
+<text text-anchor="start" x="1277.0178" y="-913.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1345.0178" y="-902.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/apply_history_best.h</text>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
 <title>Node1</title>
 <g id="a_node2"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
-<polygon fill="#ffffff" stroke="#000000" points="1143,-727.5 1143,-746.5 1238,-746.5 1238,-727.5 1143,-727.5"/>
-<text text-anchor="middle" x="1190.5" y="-734.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/module.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="960.5178,-716.5 960.5178,-735.5 1055.5178,-735.5 1055.5178,-716.5 960.5178,-716.5"/>
+<text text-anchor="middle" x="1008.0178" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M1351.8361,-895.3535C1315.0496,-877.7743 1257.2627,-845.6522 1220.5,-803 1208.8441,-789.4768 1200.8947,-770.6773 1196.1228,-756.6001"/>
-<polygon fill="#191970" stroke="#191970" points="1199.4189,-755.4138 1193.0843,-746.9213 1192.7403,-757.5105 1199.4189,-755.4138"/>
+<path fill="none" stroke="#191970" d="M1317.4397,-895.4016C1253.7431,-860.5291 1097.0685,-774.7533 1034.4769,-740.4858"/>
+<polygon fill="#191970" stroke="#191970" points="1036.0589,-737.3617 1025.6067,-735.6295 1032.6974,-743.5018 1036.0589,-737.3617"/>
 </g>
 <!-- Node6 -->
 <g id="node7" class="node">
 <title>Node6</title>
 <g id="a_node7"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2764,-436.5 2764,-455.5 2885,-455.5 2885,-436.5 2764,-436.5"/>
-<text text-anchor="middle" x="2824.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2689.5178,-436.5 2689.5178,-455.5 2810.5178,-455.5 2810.5178,-436.5 2689.5178,-436.5"/>
+<text text-anchor="middle" x="2750.0178" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node6 -->
-<g id="edge183" class="edge">
+<g id="edge184" class="edge">
 <title>Node0&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1462.7806,-909.3551C1666.014,-905.9107 2237.3844,-893.6537 2711.5,-859 2943.7697,-842.0231 3002.3918,-838.884 3232.5,-803 3354.8806,-783.9155 3423.8389,-840.1274 3505.5,-747 3685.7933,-541.3912 3103.0457,-470.0502 2895.2592,-451.4757"/>
-<polygon fill="#191970" stroke="#191970" points="2895.3298,-447.9685 2885.0618,-450.5797 2894.717,-454.9417 2895.3298,-447.9685"/>
+<path fill="none" stroke="#191970" d="M1421.0261,-908.4033C1815.36,-897.282 3610.0178,-843.587 3610.0178,-787.5 3610.0178,-787.5 3610.0178,-787.5 3610.0178,-670 3610.0178,-507.3691 3026.8066,-460.6852 2820.9333,-449.2721"/>
+<polygon fill="#191970" stroke="#191970" points="2821.009,-445.7711 2810.834,-448.7238 2820.6295,-452.7608 2821.009,-445.7711"/>
 </g>
 <!-- Node21 -->
 <g id="node9" class="node">
 <title>Node21</title>
 <g id="a_node9"><a xlink:href="array_8h.html" target="_top" xlink:title="Runtime Array container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1055.5,-302.5 1055.5,-332.5 1181.5,-332.5 1181.5,-302.5 1055.5,-302.5"/>
-<text text-anchor="start" x="1063.5" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1118.5" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="788.0178,-302.5 788.0178,-332.5 914.0178,-332.5 914.0178,-302.5 788.0178,-302.5"/>
+<text text-anchor="start" x="796.0178" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="851.0178" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node21 -->
-<g id="edge184" class="edge">
+<g id="edge185" class="edge">
 <title>Node0&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1310.3547,-903.1177C1093.6794,-881.3508 485.6245,-814.6998 428.5,-747 422.7677,-740.2064 424.5463,-734.9612 428.5,-727 432.1597,-719.6308 893.0574,-372.5079 900.5,-369 907.2686,-365.8098 984.7157,-347.8362 1045.3972,-334.0108"/>
-<polygon fill="#191970" stroke="#191970" points="1046.3866,-337.3753 1055.3607,-331.7437 1044.8334,-330.5497 1046.3866,-337.3753"/>
+<path fill="none" stroke="#191970" d="M1268.8065,-909.144C1034.7416,-903.9788 342.0178,-880.8102 342.0178,-787.5 342.0178,-787.5 342.0178,-787.5 342.0178,-726 342.0178,-605.0619 695.9397,-401.774 813.5386,-337.5649"/>
+<polygon fill="#191970" stroke="#191970" points="815.4479,-340.5107 822.5611,-332.6587 812.1039,-334.3611 815.4479,-340.5107"/>
 </g>
 <!-- Node10 -->
 <g id="node18" class="node">
 <title>Node10</title>
 <g id="a_node18"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="#ffffff" stroke="#000000" points="2804,-67.5 2804,-86.5 2923,-86.5 2923,-67.5 2804,-67.5"/>
-<text text-anchor="middle" x="2863.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2802.5178,-67.5 2802.5178,-86.5 2921.5178,-86.5 2921.5178,-67.5 2802.5178,-67.5"/>
+<text text-anchor="middle" x="2862.0178" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node10 -->
-<g id="edge187" class="edge">
+<g id="edge188" class="edge">
 <title>Node0&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M1462.7126,-908.7448C1867.5303,-899.1779 3747.5,-851.5947 3747.5,-793 3747.5,-793 3747.5,-793 3747.5,-446 3747.5,-145.8838 3448.7933,-181.8316 3154.5,-123 3079.2321,-107.9533 2992.2109,-94.7403 2933.0809,-86.3817"/>
-<polygon fill="#191970" stroke="#191970" points="2933.4961,-82.9057 2923.1064,-84.9803 2932.5222,-89.8376 2933.4961,-82.9057"/>
+<path fill="none" stroke="#191970" d="M1421.2905,-909.6734C1654.7388,-906.8053 2380.1155,-895.379 2980.0178,-859 3145.8083,-848.9462 3724.0178,-953.5951 3724.0178,-787.5 3724.0178,-787.5 3724.0178,-787.5 3724.0178,-558 3724.0178,-345.3599 3627.9104,-261.7156 3432.0178,-179 3261.83,-107.1382 3042.2669,-85.887 2931.9072,-79.6157"/>
+<polygon fill="#191970" stroke="#191970" points="2932.0709,-76.1195 2921.8956,-79.0711 2931.6906,-83.1092 2932.0709,-76.1195"/>
 </g>
 <!-- Node31 -->
 <g id="node27" class="node">
 <title>Node31</title>
 <g id="a_node27"><a xlink:href="optional_8h.html" target="_top" xlink:title="Runtime Optional container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1093.5,-235.5 1093.5,-265.5 1219.5,-265.5 1219.5,-235.5 1093.5,-235.5"/>
-<text text-anchor="start" x="1101.5" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1156.5" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1130.0178,-235.5 1130.0178,-265.5 1256.0178,-265.5 1256.0178,-235.5 1130.0178,-235.5"/>
+<text text-anchor="start" x="1138.0178" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1193.0178" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node31 -->
-<g id="edge185" class="edge">
+<g id="edge186" class="edge">
 <title>Node0&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1310.3281,-907.4737C1036.12,-896.0843 115.5,-853.3515 115.5,-793 115.5,-793 115.5,-793 115.5,-737 115.5,-442.5372 466.5575,-613.2768 715.5,-456 806.5227,-398.4937 805.2951,-348.2958 902.5,-302 959.6254,-274.7929 1030.6351,-261.9363 1083.0378,-255.8741"/>
-<polygon fill="#191970" stroke="#191970" points="1083.7031,-259.3221 1093.2606,-254.75 1082.938,-252.364 1083.7031,-259.3221"/>
+<path fill="none" stroke="#191970" d="M1268.7147,-909.8162C1022.6586,-906.6124 266.0178,-888.5454 266.0178,-787.5 266.0178,-787.5 266.0178,-787.5 266.0178,-670 266.0178,-599.8509 713.6927,-327.5643 779.0178,-302 839.4173,-278.3633 1017.9368,-262.5928 1119.7389,-255.2509"/>
+<polygon fill="#191970" stroke="#191970" points="1120.222,-258.7255 1129.9483,-254.5243 1119.725,-251.7431 1120.222,-258.7255"/>
 </g>
 <!-- Node33 -->
 <g id="node28" class="node">
 <title>Node33</title>
 <g id="a_node28"><a xlink:href="string_8h.html" target="_top" xlink:title="Runtime String container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1541.5,-235.5 1541.5,-265.5 1667.5,-265.5 1667.5,-235.5 1541.5,-235.5"/>
-<text text-anchor="start" x="1549.5" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="1604.5" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1654.0178,-235.5 1654.0178,-265.5 1780.0178,-265.5 1780.0178,-235.5 1654.0178,-235.5"/>
+<text text-anchor="start" x="1662.0178" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1717.0178" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node33 -->
-<g id="edge186" class="edge">
+<g id="edge187" class="edge">
 <title>Node0&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1382.4706,-895.416C1376.8933,-873.1752 1367.5,-830.1957 1367.5,-793 1367.5,-793 1367.5,-793 1367.5,-737 1367.5,-570.6756 1341.5906,-506.2765 1435.5,-369 1458.0693,-336.0083 1463.46,-325.9012 1495.5,-302 1513.0885,-288.8793 1534.5527,-277.8677 1553.768,-269.4383"/>
-<polygon fill="#191970" stroke="#191970" points="1555.1643,-272.6479 1562.9935,-265.5098 1552.4217,-266.2076 1555.1643,-272.6479"/>
+<path fill="none" stroke="#191970" d="M1345.5325,-895.3852C1347.1491,-846.5652 1352.0178,-688.6833 1352.0178,-558 1352.0178,-558 1352.0178,-558 1352.0178,-384.5 1352.0178,-329.5787 1394.7746,-326.3198 1444.0178,-302 1478.3131,-285.0626 1574.9053,-269.3441 1643.4363,-259.8357"/>
+<polygon fill="#191970" stroke="#191970" points="1644.3242,-263.2468 1653.7562,-258.4211 1643.3736,-256.3116 1644.3242,-263.2468"/>
 </g>
 <!-- Node41 -->
 <g id="node33" class="node">
 <title>Node41</title>
 <g id="a_node33"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#000000" points="2205.5,-369.5 2205.5,-399.5 2321.5,-399.5 2321.5,-369.5 2205.5,-369.5"/>
-<text text-anchor="start" x="2213.5" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
-<text text-anchor="middle" x="2263.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2044.0178,-369.5 2044.0178,-399.5 2160.0178,-399.5 2160.0178,-369.5 2044.0178,-369.5"/>
+<text text-anchor="start" x="2052.0178" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
+<text text-anchor="middle" x="2102.0178" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node41 -->
-<g id="edge188" class="edge">
+<g id="edge189" class="edge">
 <title>Node0&#45;&gt;Node41</title>
-<path fill="none" stroke="#191970" d="M1462.6515,-907.6926C1742.451,-896.8827 2697.5,-855.4136 2697.5,-793 2697.5,-793 2697.5,-793 2697.5,-502 2697.5,-464.5593 2453.7399,-417.2427 2331.5865,-395.8819"/>
-<polygon fill="#191970" stroke="#191970" points="2332.0325,-392.407 2321.5808,-394.1427 2330.8337,-399.3036 2332.0325,-392.407"/>
+<path fill="none" stroke="#191970" d="M1355.3955,-895.1292C1374.0554,-868.1842 1415.4106,-811.5338 1459.0178,-772 1657.6475,-591.9246 1720.903,-557.2642 1960.0178,-436 1985.9414,-422.8532 2016.0363,-411.4364 2041.8308,-402.7161"/>
+<polygon fill="#191970" stroke="#191970" points="2043.1018,-405.982 2051.4886,-399.508 2040.8951,-399.3389 2043.1018,-405.982"/>
 </g>
 <!-- Node58 -->
 <g id="node41" class="node">
 <title>Node58</title>
 <g id="a_node41"><a xlink:href="database_8h.html" target="_top" xlink:title="tvm/meta_schedule/database.h">
-<polygon fill="#ffffff" stroke="#000000" points="1897.5,-839.5 1897.5,-858.5 2067.5,-858.5 2067.5,-839.5 1897.5,-839.5"/>
-<text text-anchor="middle" x="1982.5" y="-846.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/database.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1450.0178,-839.5 1450.0178,-858.5 1620.0178,-858.5 1620.0178,-839.5 1450.0178,-839.5"/>
+<text text-anchor="middle" x="1535.0178" y="-846.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/database.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node58 -->
 <g id="edge154" class="edge">
 <title>Node0&#45;&gt;Node58</title>
-<path fill="none" stroke="#191970" d="M1462.5892,-902.6485C1570.8391,-891.4784 1769.2171,-871.0082 1887.1455,-858.8394"/>
-<polygon fill="#191970" stroke="#191970" points="1887.6597,-862.3051 1897.2476,-857.797 1886.9411,-855.342 1887.6597,-862.3051"/>
+<path fill="none" stroke="#191970" d="M1391.497,-895.4554C1423.4771,-885.104 1465.4848,-871.5067 1495.7807,-861.7004"/>
+<polygon fill="#191970" stroke="#191970" points="1497.1384,-864.9398 1505.5745,-858.5303 1494.9827,-858.28 1497.1384,-864.9398"/>
 </g>
 <!-- Node65 -->
 <g id="node43" class="node">
 <title>Node65</title>
 <g id="a_node43"><a xlink:href="target_8h.html" target="_top" xlink:title="Compilation target object. ">
-<polygon fill="#ffffff" stroke="#000000" points="1229.5,-783.5 1229.5,-802.5 1339.5,-802.5 1339.5,-783.5 1229.5,-783.5"/>
-<text text-anchor="middle" x="1284.5" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="431.0178,-778 431.0178,-797 541.0178,-797 541.0178,-778 431.0178,-778"/>
+<text text-anchor="middle" x="486.0178" y="-785" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node65 -->
-<g id="edge189" class="edge">
+<g id="edge190" class="edge">
 <title>Node0&#45;&gt;Node65</title>
-<path fill="none" stroke="#191970" d="M1373.1653,-895.1389C1354.1199,-873.1994 1319.241,-833.0203 1299.3793,-810.1404"/>
-<polygon fill="#191970" stroke="#191970" points="1301.997,-807.8167 1292.7984,-802.5595 1296.7108,-812.4055 1301.997,-807.8167"/>
+<path fill="none" stroke="#191970" d="M1268.7905,-907.7463C1088.6836,-900.8971 647.1711,-881.977 584.0178,-859 551.7512,-847.2605 520.8365,-821.4754 502.638,-804.2955"/>
+<polygon fill="#191970" stroke="#191970" points="504.826,-801.5416 495.2122,-797.0889 499.9509,-806.5649 504.826,-801.5416"/>
 </g>
 <!-- Node76 -->
 <g id="node47" class="node">
 <title>Node76</title>
 <g id="a_node47"><a xlink:href="tensor_8h.html" target="_top" xlink:title="Dataflow tensor object. ">
-<polygon fill="#ffffff" stroke="#000000" points="1695,-783.5 1695,-802.5 1786,-802.5 1786,-783.5 1695,-783.5"/>
-<text text-anchor="middle" x="1740.5" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/te/tensor.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1858.5178,-778 1858.5178,-797 1949.5178,-797 1949.5178,-778 1858.5178,-778"/>
+<text text-anchor="middle" x="1904.0178" y="-785" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/te/tensor.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node76 -->
-<g id="edge190" class="edge">
+<g id="edge191" class="edge">
 <title>Node0&#45;&gt;Node76</title>
-<path fill="none" stroke="#191970" d="M1431.6939,-895.4992C1501.902,-872.1957 1635.9398,-827.7057 1701.8943,-805.814"/>
-<polygon fill="#191970" stroke="#191970" points="1703.1072,-809.0993 1711.4955,-802.6272 1700.902,-802.4557 1703.1072,-809.0993"/>
+<path fill="none" stroke="#191970" d="M1421.2676,-898.2685C1478.6245,-888.6817 1559.0396,-874.4193 1629.0178,-859 1711.6135,-840.8005 1806.9231,-814.9082 1860.8698,-799.7903"/>
+<polygon fill="#191970" stroke="#191970" points="1862.091,-803.0827 1870.7703,-797.0065 1860.1962,-796.3441 1862.091,-803.0827"/>
 </g>
 <!-- Node2 -->
 <g id="node3" class="node">
 <title>Node2</title>
 <g id="a_node3"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1809.5,-671.5 1809.5,-690.5 1883.5,-690.5 1883.5,-671.5 1809.5,-671.5"/>
-<text text-anchor="middle" x="1846.5" y="-678.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/adt.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1745.0178,-660.5 1745.0178,-679.5 1819.0178,-679.5 1819.0178,-660.5 1745.0178,-660.5"/>
+<text text-anchor="middle" x="1782.0178" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node2 -->
 <g id="edge2" class="edge">
 <title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M1238.1614,-733.5759C1335.4685,-726.4844 1563.9728,-709.3165 1755.5,-691 1769.7592,-689.6363 1785.2569,-687.9934 1799.3539,-686.4369"/>
-<polygon fill="#191970" stroke="#191970" points="1799.783,-689.9109 1809.3334,-685.324 1799.0071,-682.954 1799.783,-689.9109"/>
+<path fill="none" stroke="#191970" d="M1055.56,-722.5603C1194.0387,-712.5411 1596.0141,-683.4576 1734.4832,-673.4392"/>
+<polygon fill="#191970" stroke="#191970" points="1735.0383,-676.9083 1744.7596,-672.6957 1734.5331,-669.9265 1735.0383,-676.9083"/>
 </g>
 <!-- Node3 -->
 <g id="node4" class="node">
 <title>Node3</title>
 <g id="a_node4"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2249,-615.5 2249,-634.5 2328,-634.5 2328,-615.5 2249,-615.5"/>
-<text text-anchor="middle" x="2288.5" y="-622.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/expr.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2140.5178,-604.5 2140.5178,-623.5 2219.5178,-623.5 2219.5178,-604.5 2140.5178,-604.5"/>
+<text text-anchor="middle" x="2180.0178" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node3 -->
 <g id="edge132" class="edge">
 <title>Node1&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1238.2978,-736.0158C1356.255,-733.1558 1670.5054,-722.9107 1930.5,-691 1945.7445,-689.1289 2144.3094,-652.0346 2238.9617,-634.2959"/>
-<polygon fill="#191970" stroke="#191970" points="2239.6966,-637.7191 2248.8805,-632.4365 2238.4068,-630.839 2239.6966,-637.7191"/>
+<path fill="none" stroke="#191970" d="M1055.6378,-725.3964C1250.0531,-722.7356 1977.9686,-710.7889 2076.0178,-680 2093.3557,-674.5556 2133.5127,-647.2492 2158.5471,-629.5093"/>
+<polygon fill="#191970" stroke="#191970" points="2160.6021,-632.3426 2166.7091,-623.6849 2156.5361,-626.6446 2160.6021,-632.3426"/>
 </g>
 <!-- Node49 -->
 <g id="node5" class="node">
 <title>Node49</title>
 <g id="a_node5"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2180.5,-554 2180.5,-573 2260.5,-573 2260.5,-554 2180.5,-554"/>
-<text text-anchor="middle" x="2220.5" y="-561" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/type.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2064.0178,-548.5 2064.0178,-567.5 2144.0178,-567.5 2144.0178,-548.5 2064.0178,-548.5"/>
+<text text-anchor="middle" x="2104.0178" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/type.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node49 -->
 <g id="edge140" class="edge">
 <title>Node1&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1238.3967,-736.4507C1375.7591,-734.4669 1769.074,-725.8186 1892.5,-691 1910.8684,-685.8183 1912.9551,-678.5114 1930.5,-671 1976.6256,-651.2525 1990.3414,-652.1347 2037.5,-635 2090.3938,-615.7814 2151.1202,-591.6367 2187.6131,-576.8981"/>
-<polygon fill="#191970" stroke="#191970" points="2189.2807,-579.999 2197.2367,-573.0024 2186.6541,-573.5105 2189.2807,-579.999"/>
+<path fill="none" stroke="#191970" d="M1055.7447,-725.7064C1239.2015,-724.3104 1893.0123,-716.7773 1978.0178,-680 2029.4877,-657.7317 2072.1164,-604.3568 2091.9841,-576.161"/>
+<polygon fill="#191970" stroke="#191970" points="2095.0556,-577.8709 2097.8291,-567.6456 2089.2843,-573.9095 2095.0556,-577.8709"/>
 </g>
 <!-- Node1&#45;&gt;Node21 -->
 <g id="edge146" class="edge">
 <title>Node1&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1142.8914,-731.6428C1048.2489,-719.6644 845.5,-686.7922 845.5,-625 845.5,-625 845.5,-625 845.5,-502 845.5,-405.2527 964.6512,-355.8113 1045.5204,-333.3965"/>
-<polygon fill="#191970" stroke="#191970" points="1046.4357,-336.7748 1055.1821,-330.7956 1044.6161,-330.0154 1046.4357,-336.7748"/>
+<path fill="none" stroke="#191970" d="M995.7982,-716.335C974.7801,-698.5263 934.0178,-658.2801 934.0178,-614 934.0178,-614 934.0178,-614 934.0178,-446 934.0178,-403.3431 901.3453,-363.6297 876.9985,-339.9579"/>
+<polygon fill="#191970" stroke="#191970" points="879.1221,-337.1505 869.4382,-332.8525 874.3281,-342.2514 879.1221,-337.1505"/>
 </g>
 <!-- Node18 -->
 <g id="node12" class="node">
 <title>Node18</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="412,-6 412,-25 457,-25 457,-6 412,-6"/>
-<text text-anchor="middle" x="434.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="376.5178,-6 376.5178,-25 421.5178,-25 421.5178,-6 376.5178,-6"/>
+<text text-anchor="middle" x="399.0178" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
 </g>
 <!-- Node1&#45;&gt;Node18 -->
 <g id="edge152" class="edge">
 <title>Node1&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1142.9049,-736.005C1007.3409,-732.8277 621.0851,-721.2977 498.5,-691 267.6817,-633.9518 115.5,-555.2638 115.5,-317.5 115.5,-317.5 115.5,-317.5 115.5,-133 115.5,-98.2309 128.6855,-86.458 157.5,-67 197.0516,-40.2915 337.2517,-24.3772 401.6804,-18.3198"/>
-<polygon fill="#191970" stroke="#191970" points="402.0676,-21.799 411.7058,-17.3996 401.4277,-14.8283 402.0676,-21.799"/>
+<path fill="none" stroke="#191970" d="M960.298,-722.0526C897.2782,-716.1797 783.158,-703.2545 688.0178,-680 652.1368,-671.2299 87.7811,-482.469 62.0178,-456 38.6378,-431.9796 38.0178,-418.0202 38.0178,-384.5 38.0178,-384.5 38.0178,-384.5 38.0178,-133 38.0178,-101.7875 37.8825,-86.7917 62.0178,-67 108.1468,-29.1728 290.5912,-18.9857 366.066,-16.3641"/>
+<polygon fill="#191970" stroke="#191970" points="366.515,-19.8515 376.3965,-16.0298 366.2885,-12.8551 366.515,-19.8515"/>
 </g>
 <!-- Node20 -->
 <g id="node13" class="node">
 <title>Node20</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="815,-179.5 815,-198.5 862,-198.5 862,-179.5 815,-179.5"/>
-<text text-anchor="middle" x="838.5" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="774.5178,-179.5 774.5178,-198.5 821.5178,-198.5 821.5178,-179.5 774.5178,-179.5"/>
+<text text-anchor="middle" x="798.0178" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
 </g>
 <!-- Node1&#45;&gt;Node20 -->
 <g id="edge153" class="edge">
 <title>Node1&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1142.7041,-735.4038C1061.913,-731.8981 902.6277,-721.2609 856.5,-691 697.6807,-586.8106 639.8121,-487.7522 679.5,-302 686.3171,-270.0936 685.5425,-257.1479 709.5,-235 735.9346,-210.5621 776.2237,-198.9405 804.7328,-193.5234"/>
-<polygon fill="#191970" stroke="#191970" points="805.404,-196.9593 814.6556,-191.7961 804.2035,-190.063 805.404,-196.9593"/>
+<path fill="none" stroke="#191970" d="M966.7713,-716.4807C901.2562,-699.0669 774.3606,-655.5344 711.0178,-568 624.0649,-447.8384 595.5748,-350.1868 689.0178,-235 707.5867,-212.1102 739.5873,-200.4282 764.1268,-194.5813"/>
+<polygon fill="#191970" stroke="#191970" points="765.0928,-197.9534 774.1261,-192.4171 763.612,-191.1118 765.0928,-197.9534"/>
 </g>
 <!-- Node16 -->
 <g id="node20" class="node">
 <title>Node16</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1137.5,-6 1137.5,-25 1181.5,-25 1181.5,-6 1137.5,-6"/>
-<text text-anchor="middle" x="1159.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1166.0178,-6 1166.0178,-25 1210.0178,-25 1210.0178,-6 1166.0178,-6"/>
+<text text-anchor="middle" x="1188.0178" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
 </g>
 <!-- Node1&#45;&gt;Node16 -->
 <g id="edge149" class="edge">
 <title>Node1&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1142.8035,-735.4702C1002.9738,-730.7244 600.4904,-715.1063 546.5,-691 367.5521,-611.1011 229.5,-580.4749 229.5,-384.5 229.5,-384.5 229.5,-384.5 229.5,-317.5 229.5,-161.147 333.2989,-129.7672 476.5,-67 536.4901,-40.7054 999.138,-21.5244 1126.9061,-16.6895"/>
-<polygon fill="#191970" stroke="#191970" points="1127.3443,-20.1757 1137.2061,-16.3035 1127.082,-13.1806 1127.3443,-20.1757"/>
+<path fill="none" stroke="#191970" d="M960.4524,-723.0662C903.438,-718.4609 805.6214,-707.0724 726.0178,-680 722.9114,-678.9436 624.9003,-625.5673 622.0178,-624 481.7893,-547.7537 285.7805,-592.7266 316.0178,-436 352.5819,-246.4801 362.4952,-129.7711 545.0178,-67 659.9212,-27.4837 1042.0365,-17.9561 1155.7955,-15.9702"/>
+<polygon fill="#191970" stroke="#191970" points="1156.0085,-19.4672 1165.9487,-15.8008 1155.8917,-12.4682 1156.0085,-19.4672"/>
 </g>
 <!-- Node1&#45;&gt;Node33 -->
 <g id="edge148" class="edge">
 <title>Node1&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1190.0702,-727.4139C1189.2406,-708.0603 1187.5,-662.9303 1187.5,-625 1187.5,-625 1187.5,-625 1187.5,-563.5 1187.5,-435.2516 1207.68,-379.9766 1309.5,-302 1309.6393,-301.8933 1444.0604,-278.4554 1531.3785,-263.2392"/>
-<polygon fill="#191970" stroke="#191970" points="1532.0032,-266.6832 1541.2539,-261.5184 1530.8014,-259.7871 1532.0032,-266.6832"/>
+<path fill="none" stroke="#191970" d="M1055.573,-718.9571C1083.6754,-712.7283 1118.4502,-701.1793 1143.0178,-680 1286.1956,-556.569 1183.5946,-411.0349 1338.0178,-302 1385.841,-268.233 1547.5851,-256.5496 1643.6319,-252.5479"/>
+<polygon fill="#191970" stroke="#191970" points="1644.0079,-256.0358 1653.8606,-252.1404 1643.7292,-249.0414 1644.0079,-256.0358"/>
 </g>
 <!-- Node36 -->
 <g id="node31" class="node">
 <title>Node36</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1916,-179.5 1916,-198.5 2009,-198.5 2009,-179.5 1916,-179.5"/>
-<text text-anchor="middle" x="1962.5" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_map</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1876.5178,-179.5 1876.5178,-198.5 1969.5178,-198.5 1969.5178,-179.5 1876.5178,-179.5"/>
+<text text-anchor="middle" x="1923.0178" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_map</text>
 </g>
 <!-- Node1&#45;&gt;Node36 -->
 <g id="edge150" class="edge">
 <title>Node1&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1197.6716,-727.135C1207.5572,-713.8061 1226.2291,-689.5773 1244.5,-671 1422.5086,-490.0062 1462.9878,-432.8979 1680.5,-302 1702.7277,-288.6235 1857.3538,-229.064 1927.7426,-202.2092"/>
-<polygon fill="#191970" stroke="#191970" points="1928.9955,-205.4774 1937.0932,-198.6452 1926.5024,-198.9364 1928.9955,-205.4774"/>
+<path fill="none" stroke="#191970" d="M1055.8015,-717.594C1088.6583,-710.5601 1132.5832,-698.6238 1168.0178,-680 1402.2519,-556.8907 1386.4621,-410.0389 1628.0178,-302 1723.6706,-259.218 1770.2455,-318.3464 1861.0178,-266 1884.7274,-252.3272 1902.9566,-225.8099 1913.333,-207.756"/>
+<polygon fill="#191970" stroke="#191970" points="1916.5413,-209.1842 1918.2747,-198.7321 1910.4016,-205.8219 1916.5413,-209.1842"/>
 </g>
 <!-- Node42 -->
 <g id="node34" class="node">
 <title>Node42</title>
 <g id="a_node34"><a xlink:href="map_8h.html" target="_top" xlink:title="Runtime Map container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="911.5,-302.5 911.5,-332.5 1037.5,-332.5 1037.5,-302.5 911.5,-302.5"/>
-<text text-anchor="start" x="919.5" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="974.5" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/map.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1122.0178,-302.5 1122.0178,-332.5 1248.0178,-332.5 1248.0178,-302.5 1122.0178,-302.5"/>
+<text text-anchor="start" x="1130.0178" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1185.0178" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/map.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node42 -->
 <g id="edge147" class="edge">
 <title>Node1&#45;&gt;Node42</title>
-<path fill="none" stroke="#191970" d="M1142.8226,-734.1703C1068.0493,-729.0186 927.3101,-716.2322 884.5,-691 857.4664,-675.0665 854.1741,-663.7064 841.5,-635 805.7327,-553.9885 810.0391,-514.2507 851.5,-436 868.7722,-403.4015 874.1096,-394.7786 900.5,-369 912.2357,-357.5364 926.7661,-346.8269 939.8355,-338.1756"/>
-<polygon fill="#191970" stroke="#191970" points="942.0865,-340.889 948.5979,-332.5312 938.2957,-335.0042 942.0865,-340.889"/>
+<path fill="none" stroke="#191970" d="M1007.5477,-716.4019C1005.8645,-671.4137 1004.5155,-478.9961 1097.0178,-369 1108.0568,-355.8734 1123.3454,-345.3058 1138.0855,-337.2406"/>
+<polygon fill="#191970" stroke="#191970" points="1139.8825,-340.2515 1147.1496,-332.5418 1136.6608,-334.0369 1139.8825,-340.2515"/>
 </g>
 <!-- Node51 -->
 <g id="node38" class="node">
 <title>Node51</title>
 <g id="a_node38"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1024.5,-671.5 1024.5,-690.5 1122.5,-690.5 1122.5,-671.5 1024.5,-671.5"/>
-<text text-anchor="middle" x="1073.5" y="-678.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/function.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1036.0178,-660.5 1036.0178,-679.5 1134.0178,-679.5 1134.0178,-660.5 1036.0178,-660.5"/>
+<text text-anchor="middle" x="1085.0178" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node51 -->
 <g id="edge133" class="edge">
 <title>Node1&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1170.387,-727.3733C1151.6897,-718.4241 1123.6256,-704.9917 1102.5066,-694.8835"/>
-<polygon fill="#191970" stroke="#191970" points="1103.8817,-691.6614 1093.3506,-690.5011 1100.8595,-697.9755 1103.8817,-691.6614"/>
+<path fill="none" stroke="#191970" d="M1021.4302,-716.2455C1033.0235,-707.814 1049.9925,-695.4729 1063.4856,-685.6598"/>
+<polygon fill="#191970" stroke="#191970" points="1065.7304,-688.355 1071.7591,-679.6427 1061.6131,-682.6938 1065.7304,-688.355"/>
 </g>
 <!-- Node54 -->
 <g id="node39" class="node">
 <title>Node54</title>
 <g id="a_node39"><a xlink:href="source__map_8h.html" target="_top" xlink:title="A map from source names to source code. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="448,-436.5 448,-455.5 589,-455.5 589,-436.5 448,-436.5"/>
-<text text-anchor="middle" x="518.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/parser/source_map.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="332.5178,-436.5 332.5178,-455.5 473.5178,-455.5 473.5178,-436.5 332.5178,-436.5"/>
+<text text-anchor="middle" x="403.0178" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/parser/source_map.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node54 -->
 <g id="edge141" class="edge">
 <title>Node1&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1142.9356,-736.2111C1023.7632,-733.7516 715.0468,-724.265 618.5,-691 568.1529,-673.653 518.5,-678.2518 518.5,-625 518.5,-625 518.5,-625 518.5,-563.5 518.5,-529.1399 518.5,-489.128 518.5,-465.7764"/>
-<polygon fill="#191970" stroke="#191970" points="522.0001,-465.7489 518.5,-455.7489 515.0001,-465.749 522.0001,-465.7489"/>
+<path fill="none" stroke="#191970" d="M960.3066,-720.8187C912.2292,-714.6325 836.4246,-702.2184 774.0178,-680 724.1925,-662.261 608.5677,-596.5051 564.0178,-568 510.245,-533.5937 451.541,-486.4333 422.1064,-462.0528"/>
+<polygon fill="#191970" stroke="#191970" points="424.2021,-459.2433 414.2782,-455.5328 419.7222,-464.622 424.2021,-459.2433"/>
 </g>
 <!-- Node57 -->
 <g id="node40" class="node">
 <title>Node57</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1253.5,-671.5 1253.5,-690.5 1339.5,-690.5 1339.5,-671.5 1253.5,-671.5"/>
-<text text-anchor="middle" x="1296.5" y="-678.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_set</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="522.0178,-660.5 522.0178,-679.5 608.0178,-679.5 608.0178,-660.5 522.0178,-660.5"/>
+<text text-anchor="middle" x="565.0178" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_set</text>
 </g>
 <!-- Node1&#45;&gt;Node57 -->
 <g id="edge151" class="edge">
 <title>Node1&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1208.9638,-727.2455C1225.6074,-718.4527 1250.2993,-705.4079 1269.2192,-695.4125"/>
-<polygon fill="#191970" stroke="#191970" points="1271.0408,-698.4086 1278.2478,-690.6427 1267.7709,-692.2193 1271.0408,-698.4086"/>
+<path fill="none" stroke="#191970" d="M960.1881,-719.9538C876.7716,-709.4091 705.2896,-687.7319 618.2375,-676.7275"/>
+<polygon fill="#191970" stroke="#191970" points="618.4115,-673.2218 608.0515,-675.4399 617.5336,-680.1665 618.4115,-673.2218"/>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge3" class="edge">
 <title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1883.8037,-676.2737C1963.2827,-666.204 2149.3364,-642.6316 2238.7707,-631.3005"/>
-<polygon fill="#191970" stroke="#191970" points="2239.4461,-634.743 2248.9268,-630.0138 2238.5662,-627.7986 2239.4461,-634.743"/>
+<path fill="none" stroke="#191970" d="M1819.1033,-664.7819C1890.9993,-654.6659 2049.3665,-632.3831 2130.2916,-620.9966"/>
+<polygon fill="#191970" stroke="#191970" points="2130.8225,-624.4565 2140.2373,-619.5973 2129.8471,-617.5248 2130.8225,-624.4565"/>
 </g>
 <!-- Node2&#45;&gt;Node49 -->
 <g id="edge126" class="edge">
 <title>Node2&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1876.448,-671.4307C1916.8479,-658.537 1991.0606,-634.9054 2054.5,-615 2097.1595,-601.6147 2146.0475,-586.4714 2179.6628,-576.0892"/>
-<polygon fill="#191970" stroke="#191970" points="2181.0585,-579.3214 2189.5811,-573.0272 2178.9936,-572.6329 2181.0585,-579.3214"/>
+<path fill="none" stroke="#191970" d="M1809.4713,-660.4509C1867.3357,-640.3242 2001.8509,-593.5363 2067.1451,-570.8253"/>
+<polygon fill="#191970" stroke="#191970" points="2068.3386,-574.1159 2076.6337,-567.5249 2066.0389,-567.5044 2068.3386,-574.1159"/>
 </g>
 <!-- Node5 -->
 <g id="node6" class="node">
 <title>Node5</title>
 <g id="a_node6"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2171,-492.5 2171,-511.5 2270,-511.5 2270,-492.5 2171,-492.5"/>
-<text text-anchor="middle" x="2220.5" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2198.5178,-492.5 2198.5178,-511.5 2297.5178,-511.5 2297.5178,-492.5 2198.5178,-492.5"/>
+<text text-anchor="middle" x="2248.0178" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
 </a>
 </g>
 </g>
 <!-- Node2&#45;&gt;Node5 -->
 <g id="edge127" class="edge">
 <title>Node2&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1862.3493,-671.472C1901.0801,-648.4186 2003.9879,-588.5452 2094.5,-548 2122.9242,-535.2673 2156.0398,-523.2832 2181.1865,-514.7409"/>
-<polygon fill="#191970" stroke="#191970" points="2182.3855,-518.0304 2190.7498,-511.5273 2180.1558,-511.395 2182.3855,-518.0304"/>
+<path fill="none" stroke="#191970" d="M1798.0233,-660.408C1836.7008,-637.6252 1938.8668,-579.7944 2031.0178,-548 2082.8609,-530.1128 2144.1753,-517.8916 2188.381,-510.5678"/>
+<polygon fill="#191970" stroke="#191970" points="2189.0764,-514.0009 2198.3872,-508.9452 2187.9558,-507.0911 2189.0764,-514.0009"/>
 </g>
 <!-- Node2&#45;&gt;Node21 -->
 <g id="edge128" class="edge">
 <title>Node2&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1827.402,-671.4641C1730.4574,-623.0584 1292.1771,-404.2193 1157.7944,-337.1202"/>
-<polygon fill="#191970" stroke="#191970" points="1159.1632,-333.8916 1148.6529,-332.5557 1156.0361,-340.1544 1159.1632,-333.8916"/>
+<path fill="none" stroke="#191970" d="M1744.9178,-661.2349C1657.8928,-640.165 1434.5046,-583.1105 1256.0178,-512 1124.7335,-459.6953 1100.2108,-428.4804 972.0178,-369 947.6449,-357.6912 920.1092,-345.8961 897.4696,-336.4518"/>
+<polygon fill="#191970" stroke="#191970" points="898.6679,-333.1597 888.0902,-332.5565 895.983,-339.6243 898.6679,-333.1597"/>
 </g>
 <!-- Node2&#45;&gt;Node10 -->
 <g id="edge130" class="edge">
 <title>Node2&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M1883.768,-673.1157C1953.4481,-658.7416 2108.3728,-628.5138 2240.5,-615 2293.0416,-609.6261 3153.8676,-617.0468 3190.5,-579 3299.0332,-466.2761 3210.2823,-359.5088 3115.5,-235 3068.4852,-173.24 3049.7213,-160.0209 2981.5,-123 2956.6425,-109.5108 2926.9751,-97.9641 2903.6064,-89.8132"/>
-<polygon fill="#191970" stroke="#191970" points="2904.6038,-86.4554 2894.0092,-86.5321 2902.3392,-93.079 2904.6038,-86.4554"/>
+<path fill="none" stroke="#191970" d="M1819.4413,-665.7861C1836.9217,-663.8883 1858.0217,-661.6979 1877.0178,-660 2154.6617,-635.1842 2864.8814,-659.9847 3128.0178,-568 3222.2921,-535.0445 3394.917,-347.0787 3401.0178,-333 3444.36,-232.98 3052.1885,-124.4029 2911.256,-88.9307"/>
+<polygon fill="#191970" stroke="#191970" points="2912.1029,-85.5348 2901.5525,-86.5042 2910.4046,-92.3257 2912.1029,-85.5348"/>
 </g>
 <!-- Node2&#45;&gt;Node16 -->
 <g id="edge131" class="edge">
 <title>Node2&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1809.3217,-676.788C1718.2317,-665.9289 1477.1443,-633.9205 1282.5,-579 1038.7568,-510.2258 883.2917,-558.751 768.5,-333 713.3859,-224.6118 724.3827,-122.6438 832.5,-67 883.6567,-40.6716 1054.8201,-24.0241 1126.9791,-18.0195"/>
-<polygon fill="#191970" stroke="#191970" points="1127.5973,-21.4808 1137.2795,-17.1791 1127.028,-14.504 1127.5973,-21.4808"/>
+<path fill="none" stroke="#191970" d="M1744.921,-668.5932C1566.0755,-661.5302 797.9102,-627.8857 711.0178,-568 544.7257,-453.3925 639.0356,-240.8888 803.0178,-123 915.0329,-42.471 1085.2411,-22.1859 1155.7164,-17.1413"/>
+<polygon fill="#191970" stroke="#191970" points="1156.0121,-20.6295 1165.76,-16.4788 1155.5513,-13.6447 1156.0121,-20.6295"/>
 </g>
 <!-- Node2&#45;&gt;Node33 -->
 <g id="edge129" class="edge">
 <title>Node2&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1840.9908,-671.1995C1810.703,-617.3197 1664.1544,-356.6207 1618.2667,-274.99"/>
-<polygon fill="#191970" stroke="#191970" points="1621.1355,-272.9507 1613.1843,-265.9486 1615.0335,-276.3809 1621.1355,-272.9507"/>
+<path fill="none" stroke="#191970" d="M1765.6842,-660.4143C1715.1248,-629.7932 1560.3746,-528.7508 1490.0178,-400 1475.43,-373.3049 1479.1455,-363.2597 1476.0178,-333 1474.6012,-319.2952 1467.353,-312.7121 1476.0178,-302 1496.8546,-276.2401 1580.6888,-262.7995 1643.6681,-256.2005"/>
+<polygon fill="#191970" stroke="#191970" points="1644.3795,-259.6466 1653.9779,-255.1611 1643.6772,-252.6819 1644.3795,-259.6466"/>
 </g>
 <!-- Node3&#45;&gt;Node49 -->
 <g id="edge4" class="edge">
 <title>Node3&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M2277.875,-615.3906C2267.4569,-605.9683 2251.3636,-591.4134 2238.8696,-580.1137"/>
-<polygon fill="#191970" stroke="#191970" points="2241.0392,-577.3568 2231.2748,-573.2449 2236.3438,-582.5484 2241.0392,-577.3568"/>
+<path fill="none" stroke="#191970" d="M2166.7796,-604.2455C2155.3368,-595.814 2138.5882,-583.4729 2125.2703,-573.6598"/>
+<polygon fill="#191970" stroke="#191970" points="2127.2311,-570.757 2117.1043,-567.6427 2123.0786,-576.3924 2127.2311,-570.757"/>
 </g>
 <!-- Node3&#45;&gt;Node5 -->
 <g id="edge119" class="edge">
 <title>Node3&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2287.873,-615.3426C2286.5129,-600.0781 2282.2955,-569.8663 2269.5,-548 2262.7543,-536.4722 2252.2443,-526.0565 2242.685,-518.0631"/>
-<polygon fill="#191970" stroke="#191970" points="2244.6593,-515.162 2234.6477,-511.6952 2240.3122,-520.6486 2244.6593,-515.162"/>
+<path fill="none" stroke="#191970" d="M2185.8154,-604.4509C2197.157,-585.7707 2222.4426,-544.1238 2237.043,-520.0761"/>
+<polygon fill="#191970" stroke="#191970" points="2240.0368,-521.8892 2242.2348,-511.5249 2234.0532,-518.2563 2240.0368,-521.8892"/>
 </g>
 <!-- Node22 -->
 <g id="node10" class="node">
 <title>Node22</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1665.5,-123.5 1665.5,-142.5 1729.5,-142.5 1729.5,-123.5 1665.5,-123.5"/>
-<text text-anchor="middle" x="1697.5" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1702.0178,-123.5 1702.0178,-142.5 1766.0178,-142.5 1766.0178,-123.5 1702.0178,-123.5"/>
+<text text-anchor="middle" x="1734.0178" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
 </g>
 <!-- Node3&#45;&gt;Node22 -->
 <g id="edge122" class="edge">
 <title>Node3&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M2248.7593,-616.349C2224.3552,-609.4568 2193.6403,-597.7741 2171.5,-579 2100.8082,-519.056 2089.1674,-489.3428 2064.5,-400 2052.9081,-358.0153 2051.4832,-343.565 2064.5,-302 2070.3216,-283.4106 2083.6784,-284.5894 2089.5,-266 2101.0558,-229.1005 2115.1988,-207.8908 2089.5,-179 2066.5135,-153.1584 1835.9737,-139.4663 1739.5498,-134.8329"/>
-<polygon fill="#191970" stroke="#191970" points="1739.7139,-131.3368 1729.5603,-134.3623 1739.3845,-138.329 1739.7139,-131.3368"/>
+<path fill="none" stroke="#191970" d="M2178.1427,-604.4625C2175.0138,-590.6436 2167.5822,-564.866 2153.0178,-548 2068.627,-450.2729 1966.9672,-512.1734 1903.0178,-400 1886.9028,-371.7328 1926.6677,-302.3092 1927.0178,-302 1981.9771,-253.4648 2042.1755,-323.2248 2088.0178,-266 2112.1927,-235.8225 2113.5369,-208.0497 2088.0178,-179 2067.7063,-155.8784 1865.7892,-140.9435 1776.3175,-135.4112"/>
+<polygon fill="#191970" stroke="#191970" points="1776.4309,-131.9117 1766.2369,-134.7983 1776.0061,-138.8988 1776.4309,-131.9117"/>
 </g>
 <!-- Node3&#45;&gt;Node10 -->
 <g id="edge121" class="edge">
 <title>Node3&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2328.0874,-624.4282C2500.5906,-621.7596 3179.4882,-609.3556 3212.5,-579 3325.0492,-475.5068 3288.71,-365.1711 3208.5,-235 3166.3335,-166.5688 3141.1436,-152.8189 3066.5,-123 3023.6533,-105.8835 2973.0498,-94.3981 2933.085,-87.1846"/>
-<polygon fill="#191970" stroke="#191970" points="2933.6196,-83.725 2923.1648,-85.4419 2932.4084,-90.6194 2933.6196,-83.725"/>
+<path fill="none" stroke="#191970" d="M2219.6534,-613.6594C2375.9251,-612.0676 2944.3525,-603.8429 3016.0178,-568 3102.9987,-524.4971 3129.754,-463.5581 3107.0178,-369 3076.2403,-240.9988 3046.1713,-203.5208 2942.0178,-123 2926.273,-110.8277 2906.972,-99.6375 2891.2731,-91.3423"/>
+<polygon fill="#191970" stroke="#191970" points="2892.5191,-88.0466 2882.0278,-86.5686 2889.3075,-94.2665 2892.5191,-88.0466"/>
 </g>
 <!-- Node3&#45;&gt;Node16 -->
 <g id="edge124" class="edge">
 <title>Node3&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2248.896,-624.5051C2099.0682,-622.3915 1567.0639,-612.6323 1401.5,-579 1244.8398,-547.1763 1213.6713,-510.8063 1063.5,-456 939.2465,-410.6526 874.8697,-436.4917 792.5,-333 716.0078,-236.893 758.5482,-123.7196 867.5,-67 912.3741,-43.6388 1060.4796,-25.7908 1126.8394,-18.7619"/>
-<polygon fill="#191970" stroke="#191970" points="1127.5469,-22.2072 1137.1303,-17.6895 1126.8213,-15.2449 1127.5469,-22.2072"/>
+<path fill="none" stroke="#191970" d="M2201.3834,-604.3807C2231.4634,-589.5038 2285.0723,-557.7726 2307.0178,-512 2350.2517,-421.8251 2325.944,-118.1603 2240.0178,-67 2195.1701,-40.2978 1393.2876,-20.2555 1220.6081,-16.2379"/>
+<polygon fill="#191970" stroke="#191970" points="1220.4582,-12.7336 1210.38,-16.0017 1220.2964,-19.7318 1220.4582,-12.7336"/>
 </g>
 <!-- Node17 -->
 <g id="node21" class="node">
 <title>Node17</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="3251,-6 3251,-25 3320,-25 3320,-6 3251,-6"/>
-<text text-anchor="middle" x="3285.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3081.5178,-6 3081.5178,-25 3150.5178,-25 3150.5178,-6 3081.5178,-6"/>
+<text text-anchor="middle" x="3116.0178" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
 </g>
 <!-- Node3&#45;&gt;Node17 -->
 <g id="edge125" class="edge">
 <title>Node3&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2328.1109,-624.0968C2503.1566,-619.9769 3200.4353,-602.166 3240.5,-579 3430.7471,-468.9961 3322.6058,-120.3002 3292.5184,-34.6431"/>
-<polygon fill="#191970" stroke="#191970" points="3295.7806,-33.3711 3289.1157,-25.1353 3289.19,-35.7298 3295.7806,-33.3711"/>
+<path fill="none" stroke="#191970" d="M2219.5752,-613.4497C2378.9559,-611.0224 2969.1585,-599.9382 3046.0178,-568 3246.3757,-484.7432 3149.2639,-122.9187 3122.2389,-34.8683"/>
+<polygon fill="#191970" stroke="#191970" points="3125.5135,-33.6131 3119.1847,-25.1162 3118.8334,-35.7052 3125.5135,-33.6131"/>
 </g>
 <!-- Node3&#45;&gt;Node33 -->
 <g id="edge120" class="edge">
 <title>Node3&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M2248.9912,-616.4787C2216.8114,-608.8052 2170.6295,-596.1031 2132.5,-579 1928.5751,-487.5289 1709.1786,-329.1734 1632.6922,-271.9277"/>
-<polygon fill="#191970" stroke="#191970" points="1634.5922,-268.9774 1624.4948,-265.7692 1630.3876,-274.574 1634.5922,-268.9774"/>
+<path fill="none" stroke="#191970" d="M2140.2644,-606.0032C2055.3435,-586.7371 1855.8593,-529.4354 1756.0178,-400 1727.7335,-363.332 1719.8612,-308.1278 1717.7334,-276.0656"/>
+<polygon fill="#191970" stroke="#191970" points="1721.212,-275.5625 1717.1963,-265.7583 1714.2215,-275.9269 1721.212,-275.5625"/>
 </g>
 <!-- Node45 -->
 <g id="node36" class="node">
 <title>Node45</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="3485.5,-308 3485.5,-327 3529.5,-327 3529.5,-308 3485.5,-308"/>
-<text text-anchor="middle" x="3507.5" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">limits</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3348.0178,-308 3348.0178,-327 3392.0178,-327 3392.0178,-308 3348.0178,-308"/>
+<text text-anchor="middle" x="3370.0178" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">limits</text>
 </g>
 <!-- Node3&#45;&gt;Node45 -->
 <g id="edge123" class="edge">
 <title>Node3&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2328.1785,-623.817C2519.1202,-618.0593 3335.1161,-592.691 3357.5,-579 3395.8517,-555.5424 3429.5084,-389.082 3443.5,-369 3453.635,-354.4534 3469.0184,-341.8904 3482.1631,-332.8148"/>
-<polygon fill="#191970" stroke="#191970" points="3484.3942,-335.5359 3490.8136,-327.1073 3480.5392,-329.6931 3484.3942,-335.5359"/>
+<path fill="none" stroke="#191970" d="M2219.6202,-613.3459C2384.1685,-610.4464 3009.1478,-597.5672 3092.0178,-568 3171.0521,-539.8013 3181.7016,-512.1985 3244.0178,-456 3288.2365,-416.1223 3333.6855,-362.4022 3355.9649,-335.0658"/>
+<polygon fill="#191970" stroke="#191970" points="3358.7853,-337.1444 3362.3534,-327.1684 3353.343,-332.7419 3358.7853,-337.1444"/>
 </g>
 <!-- Node49&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node49&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2220.5,-553.8906C2220.5,-545.3657 2220.5,-532.6392 2220.5,-521.9235"/>
-<polygon fill="#191970" stroke="#191970" points="2224.0001,-521.7448 2220.5,-511.7449 2217.0001,-521.7449 2224.0001,-521.7448"/>
+<path fill="none" stroke="#191970" d="M2128.7723,-548.3733C2152.3027,-539.2226 2187.8865,-525.3844 2214.0604,-515.2057"/>
+<polygon fill="#191970" stroke="#191970" points="2215.5349,-518.3877 2223.5863,-511.5011 2212.9977,-511.8636 2215.5349,-518.3877"/>
 </g>
 <!-- Node49&#45;&gt;Node21 -->
 <g id="edge115" class="edge">
 <title>Node49&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2183.7663,-553.9783C2129.114,-539.9032 2023.0987,-512.9515 1932.5,-492 1660.0512,-428.9947 1334.4608,-361.5497 1191.6828,-332.3655"/>
-<polygon fill="#191970" stroke="#191970" points="1192.2201,-328.9031 1181.7219,-330.3309 1190.8192,-335.7614 1192.2201,-328.9031"/>
+<path fill="none" stroke="#191970" d="M2063.9576,-549.2353C1969.2193,-528.6005 1723.877,-475.7067 1518.0178,-436 1302.6335,-394.4561 1047.0561,-350.5485 924.1708,-329.7717"/>
+<polygon fill="#191970" stroke="#191970" points="924.5687,-326.2894 914.1254,-328.0748 923.4027,-333.1916 924.5687,-326.2894"/>
 </g>
 <!-- Node49&#45;&gt;Node10 -->
 <g id="edge117" class="edge">
 <title>Node49&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2260.5898,-562.7367C2439.1719,-558.8171 3153.5,-537.4248 3153.5,-446 3153.5,-446 3153.5,-446 3153.5,-384.5 3153.5,-234.5624 2968.7578,-128.0795 2894.1771,-91.2012"/>
-<polygon fill="#191970" stroke="#191970" points="2895.3485,-87.8792 2884.8244,-86.6572 2892.2895,-94.1754 2895.3485,-87.8792"/>
+<path fill="none" stroke="#191970" d="M2144.2761,-555.1799C2321.3066,-542.5899 3023.1763,-490.6239 3054.0178,-456 3160.1128,-336.8934 2949.9591,-148.5699 2882.1404,-92.9596"/>
+<polygon fill="#191970" stroke="#191970" points="2884.228,-90.1463 2874.2555,-86.5683 2879.8201,-95.5843 2884.228,-90.1463"/>
 </g>
 <!-- Node49&#45;&gt;Node16 -->
 <g id="edge118" class="edge">
 <title>Node49&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2180.4077,-561.4248C1976.6467,-550.2122 1056.6023,-491.3974 827.5,-333 750.7071,-279.9067 767.4007,-189.9188 832.5,-123 861.2598,-93.4364 868.9127,-83.9803 906.5,-67 980.9014,-33.3888 1077.4996,-21.5356 1127.0489,-17.4936"/>
-<polygon fill="#191970" stroke="#191970" points="1127.4628,-20.9724 1137.1688,-16.7246 1126.9323,-13.9925 1127.4628,-20.9724"/>
+<path fill="none" stroke="#191970" d="M2063.7993,-557.4617C1861.6617,-553.8699 959.2595,-527.0265 779.0178,-333 732.2423,-282.6471 728.7312,-237.3665 765.0178,-179 808.647,-108.823 1065.1262,-43.8038 1156.0642,-22.6666"/>
+<polygon fill="#191970" stroke="#191970" points="1156.8594,-26.0751 1165.8191,-20.4205 1155.2887,-19.2536 1156.8594,-26.0751"/>
 </g>
 <!-- Node28 -->
 <g id="node24" class="node">
 <title>Node28</title>
 <g id="a_node24"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
-<polygon fill="#ffffff" stroke="#000000" points="2833.5,-241 2833.5,-260 2971.5,-260 2971.5,-241 2833.5,-241"/>
-<text text-anchor="middle" x="2902.5" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2832.0178,-241 2832.0178,-260 2970.0178,-260 2970.0178,-241 2832.0178,-241"/>
+<text text-anchor="middle" x="2901.0178" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
 </a>
 </g>
 </g>
 <!-- Node49&#45;&gt;Node28 -->
 <g id="edge116" class="edge">
 <title>Node49&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2260.6468,-560.0381C2399.3101,-547.6892 2851.9333,-503.9716 2894.5,-456 2941.1702,-403.4038 2920.7674,-310.1105 2908.8703,-269.8659"/>
-<polygon fill="#191970" stroke="#191970" points="2912.179,-268.7186 2905.8707,-260.2065 2905.4939,-270.7946 2912.179,-268.7186"/>
+<path fill="none" stroke="#191970" d="M2144.1273,-553.4143C2291.1097,-536.4646 2793.1194,-477.2413 2820.0178,-456 2879.5655,-408.976 2895.4516,-311.8839 2899.596,-270.2079"/>
+<polygon fill="#191970" stroke="#191970" points="2903.0847,-270.4892 2900.4644,-260.2235 2896.111,-269.8826 2903.0847,-270.4892"/>
 </g>
 <!-- Node5&#45;&gt;Node6 -->
 <g id="edge6" class="edge">
 <title>Node5&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2270.1683,-498.444C2363.9696,-491.5591 2573.5641,-475.3682 2749.5,-456 2750.9689,-455.8383 2752.4536,-455.6715 2753.9498,-455.5003"/>
-<polygon fill="#191970" stroke="#191970" points="2754.4281,-458.9682 2763.9468,-454.3159 2753.6045,-452.0169 2754.4281,-458.9682"/>
+<path fill="none" stroke="#191970" d="M2297.5253,-496.4772C2387.0557,-486.4898 2576.135,-465.3973 2679.2727,-453.8919"/>
+<polygon fill="#191970" stroke="#191970" points="2679.733,-457.3623 2689.2833,-452.7752 2678.9569,-450.4055 2679.733,-457.3623"/>
 </g>
 <!-- Node7 -->
 <g id="node8" class="node">
 <title>Node7</title>
 <g id="a_node8"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2074,-369.5 2074,-399.5 2187,-399.5 2187,-369.5 2074,-369.5"/>
-<text text-anchor="start" x="2082" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="2130.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1912.5178,-369.5 1912.5178,-399.5 2025.5178,-399.5 2025.5178,-369.5 1912.5178,-369.5"/>
+<text text-anchor="start" x="1920.5178" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="1969.0178" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node7 -->
 <g id="edge106" class="edge">
 <title>Node5&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2213.1349,-492.3845C2199.1038,-474.0661 2168.1794,-433.6925 2148.3953,-407.8633"/>
-<polygon fill="#191970" stroke="#191970" points="2151.0129,-405.5248 2142.1535,-399.7143 2145.4557,-409.7814 2151.0129,-405.5248"/>
+<path fill="none" stroke="#191970" d="M2225.1861,-492.3845C2179.1077,-472.9787 2074.2577,-428.8215 2013.9544,-403.4249"/>
+<polygon fill="#191970" stroke="#191970" points="2015.249,-400.1724 2004.6744,-399.5167 2012.532,-406.6237 2015.249,-400.1724"/>
 </g>
 <!-- Node5&#45;&gt;Node18 -->
 <g id="edge113" class="edge">
 <title>Node5&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2170.6179,-501.553C1885.4188,-498.8953 472.8429,-484.317 438.5,-456 380.995,-408.5849 389.2811,-196.9596 398.5,-123 401.6272,-97.9115 400.4962,-90.9823 408.5,-67 412.3123,-55.5769 418.2788,-43.5238 423.5351,-33.9521"/>
-<polygon fill="#191970" stroke="#191970" points="426.7231,-35.427 428.6355,-25.0062 420.642,-31.9599 426.7231,-35.427"/>
+<path fill="none" stroke="#191970" d="M2198.1034,-501.6223C1899.4981,-499.2639 361.1057,-485.686 323.0178,-456 248.2608,-397.7338 266.0178,-345.2816 266.0178,-250.5 266.0178,-250.5 266.0178,-250.5 266.0178,-133 266.0178,-79.3174 327.6261,-44.2794 366.9811,-27.3738"/>
+<polygon fill="#191970" stroke="#191970" points="368.546,-30.5145 376.4562,-23.4662 365.877,-24.0433 368.546,-30.5145"/>
 </g>
 <!-- Node5&#45;&gt;Node20 -->
 <g id="edge114" class="edge">
 <title>Node5&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2170.7405,-494.1105C2089.2654,-481.2882 1922.2712,-455.4084 1780.5,-436 1540.4576,-403.1383 1480.186,-396.7575 1239.5,-369 1164.6809,-360.3714 965.4715,-374.3151 902.5,-333 859.7351,-304.9423 845.299,-241.0648 840.6228,-208.6503"/>
-<polygon fill="#191970" stroke="#191970" points="844.0792,-208.0857 839.3453,-198.6073 837.1352,-208.9691 844.0792,-208.0857"/>
+<path fill="none" stroke="#191970" d="M2198.2751,-496.9168C2064.8859,-483.1267 1688.182,-443.137 1376.0178,-400 1329.2059,-393.5312 998.4422,-353.8174 956.0178,-333 937.9524,-324.1354 939.5558,-313.4644 923.0178,-302 893.2874,-281.3906 877.2743,-289.785 850.0178,-266 831.1203,-249.5094 815.9958,-224.646 807.0323,-207.6635"/>
+<polygon fill="#191970" stroke="#191970" points="810.1299,-206.0331 802.4847,-198.6981 803.887,-209.1997 810.1299,-206.0331"/>
 </g>
 <!-- Node25 -->
 <g id="node17" class="node">
 <title>Node25</title>
 <g id="a_node17"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2300,-123.5 2300,-142.5 2429,-142.5 2429,-123.5 2300,-123.5"/>
-<text text-anchor="middle" x="2364.5" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2102.5178,-123.5 2102.5178,-142.5 2231.5178,-142.5 2231.5178,-123.5 2102.5178,-123.5"/>
+<text text-anchor="middle" x="2167.0178" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node25 -->
 <g id="edge109" class="edge">
 <title>Node5&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M2270.1565,-496.1509C2338.5245,-486.1064 2463.0935,-460.4782 2550.5,-400 2581.4974,-378.5523 2590.2854,-368.6601 2602.5,-333 2636.1431,-234.7807 2484.844,-171.243 2407.5449,-145.7237"/>
-<polygon fill="#191970" stroke="#191970" points="2408.1958,-142.2561 2397.6042,-142.5166 2406.0465,-148.918 2408.1958,-142.2561"/>
+<path fill="none" stroke="#191970" d="M2248.1487,-492.2393C2248.2891,-470.1316 2247.7629,-414.6111 2240.0178,-369 2229.6544,-307.9697 2220.5148,-294.0759 2202.0178,-235 2192.9133,-205.9222 2181.2392,-172.5912 2173.9424,-152.1672"/>
+<polygon fill="#191970" stroke="#191970" points="2177.1466,-150.7341 2170.4727,-142.5055 2170.5586,-153.1 2177.1466,-150.7341"/>
 </g>
 <!-- Node5&#45;&gt;Node10 -->
 <g id="edge110" class="edge">
 <title>Node5&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2270.1601,-501.5165C2417.2498,-499.6849 2843.5915,-491.3503 2894.5,-456 2981.0723,-395.8852 3015.5187,-334.4094 2980.5,-235 2963.0651,-185.5065 2937.3053,-185.4807 2906.5,-143 2895.1299,-127.3205 2883.1798,-108.7899 2874.7713,-95.3615"/>
-<polygon fill="#191970" stroke="#191970" points="2877.5804,-93.2504 2869.3335,-86.5991 2871.6326,-96.9415 2877.5804,-93.2504"/>
+<path fill="none" stroke="#191970" d="M2297.6575,-499.8269C2427.8979,-493.8692 2772.6509,-476.3581 2820.0178,-456 2925.4937,-410.6669 2965.7297,-377.0065 2995.0178,-266 2998.5327,-252.6781 3001.1388,-247.3434 2995.0178,-235 2987.5144,-219.869 2882.8497,-157.396 2874.0178,-143 2865.5487,-129.1954 2862.7397,-110.9131 2861.9405,-97.0914"/>
+<polygon fill="#191970" stroke="#191970" points="2865.4295,-96.6287 2861.6619,-86.7264 2858.432,-96.8169 2865.4295,-96.6287"/>
 </g>
 <!-- Node11 -->
 <g id="node19" class="node">
 <title>Node11</title>
 <g id="a_node19"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="3544,-.5 3544,-30.5 3673,-30.5 3673,-.5 3544,-.5"/>
-<text text-anchor="start" x="3552" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
-<text text-anchor="middle" x="3608.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="3326.5178,-.5 3326.5178,-30.5 3455.5178,-30.5 3455.5178,-.5 3326.5178,-.5"/>
+<text text-anchor="start" x="3334.5178" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
+<text text-anchor="middle" x="3391.0178" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node11 -->
 <g id="edge108" class="edge">
 <title>Node5&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2270.0626,-501.108C2412.7208,-498.1891 2822.4197,-487.2454 2952.5,-456 3086.2475,-423.8738 3115.4167,-398.2635 3236.5,-333 3372.14,-259.8904 3418.0054,-252.4172 3526.5,-143 3557.3624,-111.8752 3582.7745,-67.2844 3596.7655,-39.9224"/>
-<polygon fill="#191970" stroke="#191970" points="3600.0464,-41.187 3601.3923,-30.678 3593.7866,-38.054 3600.0464,-41.187"/>
+<path fill="none" stroke="#191970" d="M2297.783,-499.9159C2428.4271,-494.2104 2778.9424,-477.3301 2893.0178,-456 3114.8964,-414.5126 3180.6264,-397.6013 3364.0178,-266 3428.1186,-220.0014 3445.0746,-206.4123 3492.0178,-143 3514.0348,-113.2588 3547.0087,-97.474 3526.0178,-67 3517.6119,-54.7966 3488.2119,-42.8883 3458.9856,-33.6019"/>
+<polygon fill="#191970" stroke="#191970" points="3459.6988,-30.1592 3449.1114,-30.5564 3457.6356,-36.8483 3459.6988,-30.1592"/>
 </g>
 <!-- Node5&#45;&gt;Node16 -->
 <g id="edge111" class="edge">
 <title>Node5&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2170.6928,-498.5018C1983.195,-485.1041 1326.1171,-436.0049 1239.5,-400 1219.3736,-391.6339 1221.6079,-377.4104 1201.5,-369 1064.5744,-311.7286 990.5874,-418.8282 869.5,-333 809.0302,-290.1383 765.5613,-241.439 805.5,-179 878.6864,-64.5825 1054.5911,-29.083 1127.2564,-19.0795"/>
-<polygon fill="#191970" stroke="#191970" points="1127.7405,-22.5461 1137.2039,-17.7822 1126.8352,-15.6049 1127.7405,-22.5461"/>
+<path fill="none" stroke="#191970" d="M2198.4532,-495.4202C1986.3168,-467.1561 1163.3343,-356.4242 1113.0178,-333 1025.3834,-292.2028 950.0178,-285.6654 950.0178,-189 950.0178,-189 950.0178,-189 950.0178,-133 950.0178,-87.6191 1090.9031,-42.5143 1155.8833,-24.1243"/>
+<polygon fill="#191970" stroke="#191970" points="1156.9355,-27.4645 1165.6273,-21.4062 1155.0546,-20.7219 1156.9355,-27.4645"/>
 </g>
 <!-- Node5&#45;&gt;Node17 -->
 <g id="edge112" class="edge">
 <title>Node5&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2270.265,-500.854C2420.4207,-497.102 2862.2437,-483.7981 2920.5,-456 3122.0365,-359.8329 3147.6664,-279.112 3261.5,-87 3271.0742,-70.8421 3277.454,-50.3442 3281.2134,-35.4423"/>
-<polygon fill="#191970" stroke="#191970" points="3284.7156,-35.8366 3283.5869,-25.3021 3277.8999,-34.2412 3284.7156,-35.8366"/>
+<path fill="none" stroke="#191970" d="M2297.9334,-501.1511C2423.8303,-498.5427 2751.6129,-488.7204 2855.0178,-456 3019.8562,-403.8402 3017.9735,-301.9412 3086.0178,-143 3096.5419,-118.4174 3101.5867,-112.9558 3108.0178,-87 3112.237,-69.9714 3114.2346,-50.1058 3115.1783,-35.642"/>
+<polygon fill="#191970" stroke="#191970" points="3118.6892,-35.5314 3115.7315,-25.3578 3111.6993,-35.1554 3118.6892,-35.5314"/>
 </g>
 <!-- Node29 -->
 <g id="node25" class="node">
 <title>Node29</title>
 <g id="a_node25"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="2428,-369.5 2428,-399.5 2541,-399.5 2541,-369.5 2428,-369.5"/>
-<text text-anchor="start" x="2436" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="2484.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2608.5178,-369.5 2608.5178,-399.5 2721.5178,-399.5 2721.5178,-369.5 2608.5178,-369.5"/>
+<text text-anchor="start" x="2616.5178" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="2665.0178" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
 </a>
 </g>
 </g>
 <!-- Node5&#45;&gt;Node29 -->
 <g id="edge107" class="edge">
 <title>Node5&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M2242.1042,-492.3845C2285.5258,-473.0586 2384.1025,-429.1845 2441.2727,-403.7394"/>
-<polygon fill="#191970" stroke="#191970" points="2443.0476,-406.7805 2450.7604,-399.5167 2440.2012,-400.3853 2443.0476,-406.7805"/>
+<path fill="none" stroke="#191970" d="M2281.7894,-492.484C2351.4984,-472.8418 2512.2691,-427.5407 2601.8442,-402.3007"/>
+<polygon fill="#191970" stroke="#191970" points="2602.9334,-405.6302 2611.6093,-399.5492 2601.0348,-398.8925 2602.9334,-405.6302"/>
 </g>
 <!-- Node6&#45;&gt;Node7 -->
 <g id="edge7" class="edge">
 <title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2763.9607,-444.7317C2653.2967,-441.7238 2410.8955,-431.947 2197.414,-399.9711"/>
-<polygon fill="#191970" stroke="#191970" points="2197.6827,-396.4718 2187.2707,-398.4315 2196.6323,-403.3926 2197.6827,-396.4718"/>
+<path fill="none" stroke="#191970" d="M2689.3081,-443.3967C2544.1322,-436.9666 2179.0065,-419.5321 2036.0888,-399.9389"/>
+<polygon fill="#191970" stroke="#191970" points="2036.1786,-396.4162 2025.7836,-398.464 2035.1868,-403.3456 2036.1786,-396.4162"/>
 </g>
 <!-- Node6&#45;&gt;Node20 -->
 <g id="edge105" class="edge">
 <title>Node6&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2763.9588,-443.4128C2565.5803,-434.4732 1917.3684,-401.6013 1385.5,-333 1384.7677,-332.9055 966.1932,-266.2543 965.5,-266 924.7527,-251.0515 882.6825,-222.4704 858.6461,-204.6474"/>
-<polygon fill="#191970" stroke="#191970" points="860.7343,-201.8385 850.6424,-198.613 856.5201,-207.4279 860.7343,-201.8385"/>
+<path fill="none" stroke="#191970" d="M2689.3953,-443.1554C2550.3292,-436.5318 2197.6613,-419.1772 1903.0178,-400 1804.8324,-393.6095 1114.7747,-359.5015 1020.0178,-333 1012.0978,-330.7849 894.8573,-270.5666 888.0178,-266 880.5218,-260.9951 841.0785,-226.7285 816.9639,-205.629"/>
+<polygon fill="#191970" stroke="#191970" points="818.9792,-202.7414 809.151,-198.7845 814.3665,-208.0067 818.9792,-202.7414"/>
 </g>
 <!-- Node6&#45;&gt;Node25 -->
 <g id="edge69" class="edge">
 <title>Node6&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M2821.8871,-436.2062C2814.6275,-410.7395 2791.9023,-341.6707 2749.5,-302 2653.2698,-211.9689 2501.3027,-164.8733 2420.0637,-144.9297"/>
-<polygon fill="#191970" stroke="#191970" points="2420.5792,-141.454 2410.038,-142.5185 2418.9424,-148.2599 2420.5792,-141.454"/>
+<path fill="none" stroke="#191970" d="M2749.6908,-436.4929C2747.9757,-402.7639 2737.2979,-288.0245 2671.0178,-235 2605.6709,-182.722 2365.729,-152.3097 2241.9656,-139.7746"/>
+<polygon fill="#191970" stroke="#191970" points="2242.1082,-136.2715 2231.8097,-138.7598 2241.4121,-143.2368 2242.1082,-136.2715"/>
 </g>
 <!-- Node6&#45;&gt;Node10 -->
 <g id="edge71" class="edge">
 <title>Node6&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2838.0181,-436.1627C2870.2587,-411.7509 2951.1601,-344.6524 2980.5,-266 2985.3154,-253.0911 2984.817,-248.084 2980.5,-235 2960.9348,-175.7017 2909.5839,-120.4678 2881.8677,-93.7714"/>
-<polygon fill="#191970" stroke="#191970" points="2884.0778,-91.0444 2874.4042,-86.7233 2879.2717,-96.1339 2884.0778,-91.0444"/>
+<path fill="none" stroke="#191970" d="M2773.2865,-436.3698C2822.405,-414.583 2935.7103,-356.2752 2979.0178,-266 2984.9771,-253.5777 2986.0881,-246.8253 2979.0178,-235 2961.6706,-205.9864 2940.5857,-217.0718 2912.0178,-199 2876.5598,-176.5696 2854.9404,-180.4477 2836.0178,-143 2827.9989,-127.1306 2837.2942,-108.1691 2846.925,-94.6505"/>
+<polygon fill="#191970" stroke="#191970" points="2849.7718,-96.6906 2853.1236,-86.64 2844.2357,-92.4067 2849.7718,-96.6906"/>
 </g>
 <!-- Node6&#45;&gt;Node11 -->
 <g id="edge67" class="edge">
 <title>Node6&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2852.3382,-436.4031C2876.7297,-427.7206 2912.9546,-414.1585 2943.5,-400 3207.1304,-277.801 3274.8596,-246.8725 3517.5,-87 3540.7787,-71.662 3565.5554,-51.9466 3583.3681,-37.1045"/>
-<polygon fill="#191970" stroke="#191970" points="3585.6299,-39.7756 3591.0318,-30.6613 3581.1252,-34.4176 3585.6299,-39.7756"/>
+<path fill="none" stroke="#191970" d="M2810.6302,-439.3483C2962.0153,-419.0532 3355.5688,-342.7637 3510.0178,-87 3525.4966,-61.3675 3497.9255,-43.7745 3465.4431,-32.4548"/>
+<polygon fill="#191970" stroke="#191970" points="3466.3094,-29.056 3455.717,-29.2843 3464.1398,-35.7113 3466.3094,-29.056"/>
 </g>
 <!-- Node6&#45;&gt;Node16 -->
 <g id="edge103" class="edge">
 <title>Node6&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2763.5881,-440.1281C2651.9013,-429.2198 2425.1777,-406.3591 2419.5,-400 2378.4069,-353.9753 2476.875,-171.3143 2438.5,-123 2370.1065,-36.8921 2306.405,-82.2298 2197.5,-67 1996.1671,-38.8446 1345.009,-20.3528 1191.933,-16.3275"/>
-<polygon fill="#191970" stroke="#191970" points="1191.8733,-12.8249 1181.7853,-16.0628 1191.6906,-19.8225 1191.8733,-12.8249"/>
+<path fill="none" stroke="#191970" d="M2689.2941,-436.9933C2590.3779,-420.9861 2401.3157,-384.3688 2360.0178,-333 2316.5712,-278.9585 2353.8684,-245.729 2335.0178,-179 2327.7031,-153.107 2324.1477,-147.017 2312.0178,-123 2298.8913,-97.0097 2303.0488,-81.8745 2278.0178,-67 2231.4075,-39.3021 1396.3,-19.9542 1220.3795,-16.1754"/>
+<polygon fill="#191970" stroke="#191970" points="1220.2822,-12.6727 1210.2099,-15.9586 1220.133,-19.6711 1220.2822,-12.6727"/>
 </g>
 <!-- Node6&#45;&gt;Node17 -->
 <g id="edge104" class="edge">
 <title>Node6&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2846.2832,-436.3362C2864.2525,-427.9073 2890.0271,-414.7431 2910.5,-400 3045.4626,-302.8097 3062.23,-257.8737 3182.5,-143 3208.4119,-118.2507 3217.9038,-114.8094 3240.5,-87 3254.157,-70.1921 3267.0303,-48.9868 3275.4805,-34.0483"/>
-<polygon fill="#191970" stroke="#191970" points="3278.5727,-35.689 3280.3586,-25.2458 3272.45,-32.296 3278.5727,-35.689"/>
+<path fill="none" stroke="#191970" d="M2788.028,-436.4398C2814.606,-428.8193 2850.3342,-416.6565 2879.0178,-400 2918.5378,-377.0508 2927.9802,-368.289 2957.0178,-333 3038.2013,-234.3388 3092.1495,-87.3235 3109.7498,-34.917"/>
+<polygon fill="#191970" stroke="#191970" points="3113.1392,-35.8146 3112.9505,-25.2214 3106.492,-33.6201 3113.1392,-35.8146"/>
 </g>
 <!-- Node6&#45;&gt;Node28 -->
 <g id="edge68" class="edge">
 <title>Node6&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2828.368,-436.3051C2840.5842,-405.6863 2878.4981,-310.6587 2894.8576,-269.6551"/>
-<polygon fill="#191970" stroke="#191970" points="2898.1537,-270.8385 2898.6086,-260.2534 2891.652,-268.2444 2898.1537,-270.8385"/>
+<path fill="none" stroke="#191970" d="M2757.5059,-436.3051C2781.4063,-405.3613 2856.1164,-308.6339 2887.2172,-268.3677"/>
+<polygon fill="#191970" stroke="#191970" points="2890.1417,-270.3071 2893.4845,-260.2534 2884.6017,-266.0281 2890.1417,-270.3071"/>
 </g>
 <!-- Node6&#45;&gt;Node29 -->
 <g id="edge36" class="edge">
 <title>Node6&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M2771.748,-436.4581C2712.3475,-425.7136 2615.71,-408.2336 2551.1392,-396.5539"/>
-<polygon fill="#191970" stroke="#191970" points="2551.5253,-393.067 2541.062,-394.7311 2550.2793,-399.9552 2551.5253,-393.067"/>
+<path fill="none" stroke="#191970" d="M2736.7365,-436.3906C2725.3393,-428.1444 2708.5086,-415.9669 2694.0155,-405.4807"/>
+<polygon fill="#191970" stroke="#191970" points="2695.9646,-402.5709 2685.8111,-399.5446 2691.8612,-408.2421 2695.9646,-402.5709"/>
 </g>
 <!-- Node30 -->
 <g id="node26" class="node">
 <title>Node30</title>
 <g id="a_node26"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2468,-308 2468,-327 2593,-327 2593,-308 2468,-308"/>
-<text text-anchor="middle" x="2530.5" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2368.5178,-308 2368.5178,-327 2493.5178,-327 2493.5178,-308 2368.5178,-308"/>
+<text text-anchor="middle" x="2431.0178" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
 </a>
 </g>
 </g>
 <!-- Node6&#45;&gt;Node30 -->
 <g id="edge70" class="edge">
 <title>Node6&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M2802.4022,-436.3416C2750.6641,-413.7282 2621.5359,-357.2895 2561.7653,-331.1653"/>
-<polygon fill="#191970" stroke="#191970" points="2563.0179,-327.8931 2552.4532,-327.0952 2560.2144,-334.3072 2563.0179,-327.8931"/>
+<path fill="none" stroke="#191970" d="M2713.0951,-436.417C2682.3907,-428.0363 2637.7704,-414.8975 2600.0178,-400 2548.0509,-379.4935 2490.0516,-349.5428 2457.3004,-331.9342"/>
+<polygon fill="#191970" stroke="#191970" points="2458.8303,-328.7825 2448.3697,-327.1013 2455.4988,-334.9389 2458.8303,-328.7825"/>
 </g>
 <!-- Node6&#45;&gt;Node41 -->
 <g id="edge72" class="edge">
 <title>Node6&#45;&gt;Node41</title>
-<path fill="none" stroke="#191970" d="M2763.7838,-440.4813C2637.4739,-428.9741 2356.3582,-403.2147 2335.5,-400 2334.2205,-399.8028 2332.9296,-399.597 2331.6301,-399.3835"/>
-<polygon fill="#191970" stroke="#191970" points="2332.0042,-395.8957 2321.5495,-397.6132 2330.7934,-402.7902 2332.0042,-395.8957"/>
+<path fill="none" stroke="#191970" d="M2689.3468,-440.2419C2568.1929,-428.7435 2297.0578,-403.0107 2170.0712,-390.9588"/>
+<polygon fill="#191970" stroke="#191970" points="2170.3633,-387.4709 2160.0773,-390.0103 2169.7018,-394.4395 2170.3633,-387.4709"/>
 </g>
 <!-- Node7&#45;&gt;Node21 -->
 <g id="edge8" class="edge">
 <title>Node7&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2073.922,-380.7542C1901.76,-369.3561 1384.277,-335.0959 1191.7277,-322.3481"/>
-<polygon fill="#191970" stroke="#191970" points="1191.9174,-318.8531 1181.708,-321.6847 1191.4549,-325.8378 1191.9174,-318.8531"/>
+<path fill="none" stroke="#191970" d="M1912.2899,-381.8525C1767.3236,-374.9774 1371.4784,-355.5162 1042.0178,-333 1003.1989,-330.347 959.9843,-326.883 924.4925,-323.9027"/>
+<polygon fill="#191970" stroke="#191970" points="924.4785,-320.3891 914.2195,-323.0355 923.8896,-327.3643 924.4785,-320.3891"/>
 </g>
 <!-- Node7&#45;&gt;Node16 -->
 <g id="edge35" class="edge">
 <title>Node7&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2073.9017,-380.6801C1939.4448,-370.6698 1591.0385,-339.2193 1309.5,-266 1272.1945,-256.298 1264.6569,-248.3607 1228.5,-235 1183.7991,-218.4821 1154.9639,-237.9454 1127.5,-199 1091.5558,-148.0289 1127.9344,-69.7065 1148.0633,-34.1759"/>
-<polygon fill="#191970" stroke="#191970" points="1151.2004,-35.7449 1153.2361,-25.3475 1145.1608,-32.2061 1151.2004,-35.7449"/>
+<path fill="none" stroke="#191970" d="M1912.3937,-377.1677C1802.0184,-362.1235 1550.9532,-324.2076 1346.0178,-266 1308.938,-255.4683 1302.03,-245.7668 1265.0178,-235 1187.9357,-212.577 1138.8115,-261.1646 1088.0178,-199 1065.0875,-170.9364 1027.3811,-199.7374 1118.0178,-67 1128.5862,-51.5226 1145.3797,-38.9398 1159.8689,-30.0867"/>
+<polygon fill="#191970" stroke="#191970" points="1161.6272,-33.113 1168.5163,-25.0638 1158.1113,-27.06 1161.6272,-33.113"/>
 </g>
 <!-- Node7&#45;&gt;Node28 -->
 <g id="edge30" class="edge">
 <title>Node7&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2187.2795,-370.6245C2190.3902,-370.0335 2193.4786,-369.4862 2196.5,-369 2375.3515,-340.219 2427.4779,-379.7276 2602.5,-333 2634.2004,-324.5366 2638.4115,-312.4905 2669.5,-302 2723.0748,-283.9217 2785.7522,-270.4815 2832.5978,-261.8726"/>
-<polygon fill="#191970" stroke="#191970" points="2833.4268,-265.2795 2842.6442,-260.0556 2832.1809,-258.3912 2833.4268,-265.2795"/>
+<path fill="none" stroke="#191970" d="M2025.7906,-370.5818C2028.903,-370.0018 2031.9937,-369.4686 2035.0178,-369 2241.1724,-337.0579 2296.5121,-362.5869 2503.0178,-333 2569.1131,-323.5303 2584.647,-315.5991 2650.0178,-302 2716.7745,-288.1126 2793.5748,-272.3924 2844.4585,-262.0129"/>
+<polygon fill="#191970" stroke="#191970" points="2845.1801,-265.4378 2854.2791,-260.0103 2843.7814,-258.579 2845.1801,-265.4378"/>
 </g>
 <!-- Node21&#45;&gt;Node22 -->
 <g id="edge9" class="edge">
 <title>Node21&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1181.8789,-304.3408C1186.4812,-303.5085 1191.0577,-302.718 1195.5,-302 1260.478,-291.4977 1434.801,-302.6123 1489.5,-266 1526.3789,-241.3154 1503.7086,-206.5489 1538.5,-179 1556.6206,-164.6515 1614.7522,-150.1718 1655.4546,-141.3916"/>
-<polygon fill="#191970" stroke="#191970" points="1656.335,-144.7828 1665.3922,-139.2856 1654.8838,-137.9349 1656.335,-144.7828"/>
+<path fill="none" stroke="#191970" d="M914.1485,-313.7268C995.9475,-307.9116 1142.5874,-294.3251 1265.0178,-266 1354.2992,-245.3442 1373.3676,-228.8257 1460.0178,-199 1485.8005,-190.1254 1491.7452,-186.2979 1518.0178,-179 1526.3991,-176.6719 1631.4703,-154.5227 1691.9152,-141.8283"/>
+<polygon fill="#191970" stroke="#191970" points="1692.8627,-145.2057 1701.9302,-139.7257 1691.4244,-138.3551 1692.8627,-145.2057"/>
 </g>
 <!-- Node23 -->
 <g id="node11" class="node">
 <title>Node23</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1839.5,-179.5 1839.5,-198.5 1897.5,-198.5 1897.5,-179.5 1839.5,-179.5"/>
-<text text-anchor="middle" x="1868.5" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1130.0178,-179.5 1130.0178,-198.5 1188.0178,-198.5 1188.0178,-179.5 1130.0178,-179.5"/>
+<text text-anchor="middle" x="1159.0178" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
 </g>
 <!-- Node21&#45;&gt;Node23 -->
 <g id="edge10" class="edge">
 <title>Node21&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1181.7747,-313.9673C1307.3823,-306.5756 1584.0542,-288.3018 1676.5,-266 1684.6551,-264.0327 1783.2093,-223.9089 1835.8423,-202.3822"/>
-<polygon fill="#191970" stroke="#191970" points="1837.1739,-205.6191 1845.1035,-198.5925 1834.5228,-199.1405 1837.1739,-205.6191"/>
+<path fill="none" stroke="#191970" d="M868.9966,-302.4708C891.9604,-284.0037 933.5863,-252.9228 974.0178,-235 1021.8632,-213.7906 1081.1886,-201.2318 1119.5555,-194.7292"/>
+<polygon fill="#191970" stroke="#191970" points="1120.4908,-198.1227 1129.7942,-193.0535 1119.3602,-191.2146 1120.4908,-198.1227"/>
 </g>
 <!-- Node21&#45;&gt;Node18 -->
 <g id="edge11" class="edge">
 <title>Node21&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1085.9452,-302.4673C1065.0099,-292.5719 1037.3597,-279.0744 1013.5,-266 948.2592,-230.25 938.0422,-210.1845 870.5,-179 751.9574,-124.2684 713.0927,-134.5722 591.5,-87 545.7489,-69.1002 494.1701,-44.7841 463.0153,-29.623"/>
-<polygon fill="#191970" stroke="#191970" points="464.5522,-26.4785 454.0313,-25.229 461.4766,-32.7667 464.5522,-26.4785"/>
+<path fill="none" stroke="#191970" d="M787.9799,-302.6237C702.199,-279.8893 547.3472,-229.7433 442.0178,-143 418.7998,-123.879 413.8499,-115.4257 404.0178,-87 398.2405,-70.2973 397.3347,-50.1195 397.6733,-35.464"/>
+<polygon fill="#191970" stroke="#191970" points="401.188,-35.214 398.1423,-25.0664 394.1951,-34.8985 401.188,-35.214"/>
 </g>
 <!-- Node21&#45;&gt;Node20 -->
 <g id="edge12" class="edge">
 <title>Node21&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1055.4524,-303.7376C993.1656,-289.9788 906.2175,-270.2669 899.5,-266 876.543,-251.4179 858.5168,-225.1242 848.1865,-207.3681"/>
-<polygon fill="#191970" stroke="#191970" points="851.1782,-205.5464 843.2568,-198.5106 845.0617,-208.9506 851.1782,-205.5464"/>
+<path fill="none" stroke="#191970" d="M820.6192,-302.4946C806.7528,-293.8907 791.733,-281.6614 784.0178,-266 774.9549,-247.603 781.4458,-224.1324 788.2999,-207.9663"/>
+<polygon fill="#191970" stroke="#191970" points="791.6481,-209.0672 792.6967,-198.5244 785.3024,-206.1122 791.6481,-209.0672"/>
 </g>
 <!-- Node24 -->
 <g id="node14" class="node">
 <title>Node24</title>
 <g id="a_node14"><a xlink:href="runtime_2container_2base_8h.html" target="_top" xlink:title="Base utilities for common POD(plain old data) container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="1360,-179.5 1360,-198.5 1415,-198.5 1415,-179.5 1360,-179.5"/>
-<text text-anchor="middle" x="1387.5" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1396.5178,-179.5 1396.5178,-198.5 1451.5178,-198.5 1451.5178,-179.5 1396.5178,-179.5"/>
+<text text-anchor="middle" x="1424.0178" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
 </a>
 </g>
 </g>
 <!-- Node21&#45;&gt;Node24 -->
 <g id="edge13" class="edge">
 <title>Node21&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1167.8192,-302.4857C1195.8527,-293.2772 1231.2824,-280.4894 1261.5,-266 1299.5876,-247.737 1340.842,-221.0861 1365.4094,-204.4098"/>
-<polygon fill="#191970" stroke="#191970" points="1367.3973,-207.2905 1373.6688,-198.7512 1363.441,-201.5158 1367.3973,-207.2905"/>
+<path fill="none" stroke="#191970" d="M893.5193,-302.4011C946.2761,-284.152 1039.2898,-253.5856 1121.0178,-235 1215.2835,-213.5632 1328.3694,-199.3841 1386.0775,-192.9587"/>
+<polygon fill="#191970" stroke="#191970" points="1386.6327,-196.4189 1396.1914,-191.8492 1385.8693,-189.4607 1386.6327,-196.4189"/>
 </g>
 <!-- Node24&#45;&gt;Node22 -->
 <g id="edge27" class="edge">
 <title>Node24&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1415.0104,-184.0304C1469.3323,-174.2174 1590.6851,-152.2956 1655.2322,-140.6355"/>
-<polygon fill="#191970" stroke="#191970" points="1656.2442,-144.0094 1665.4627,-138.7874 1654.9998,-137.1209 1656.2442,-144.0094"/>
+<path fill="none" stroke="#191970" d="M1451.9694,-182.0446C1456.6474,-180.9669 1461.4607,-179.9107 1466.0178,-179 1564.0351,-159.4111 1589.8125,-161.6234 1688.0178,-143 1689.2824,-142.7602 1690.5656,-142.512 1691.8602,-142.2574"/>
+<polygon fill="#191970" stroke="#191970" points="1692.7924,-145.6395 1701.8917,-140.2124 1691.3941,-138.7805 1692.7924,-145.6395"/>
 </g>
 <!-- Node24&#45;&gt;Node18 -->
 <g id="edge29" class="edge">
 <title>Node24&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1398.6891,-179.1634C1412.9724,-165.4348 1434.1758,-140.2743 1419.5,-123 1356.3519,-48.671 632.0515,-21.6703 467.5808,-16.4762"/>
-<polygon fill="#191970" stroke="#191970" points="467.2489,-12.9644 457.1452,-16.1524 467.0318,-19.961 467.2489,-12.9644"/>
+<path fill="none" stroke="#191970" d="M1435.9821,-179.4708C1451.6218,-165.8886 1475.319,-140.6553 1460.0178,-123 1390.9429,-43.2982 604.2449,-20.312 432.1481,-16.222"/>
+<polygon fill="#191970" stroke="#191970" points="432.0154,-12.718 421.9367,-15.9845 431.8526,-19.7161 432.0154,-12.718"/>
 </g>
 <!-- Node9 -->
 <g id="node15" class="node">
 <title>Node9</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1322,-123.5 1322,-142.5 1411,-142.5 1411,-123.5 1322,-123.5"/>
-<text text-anchor="middle" x="1366.5" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1362.5178,-123.5 1362.5178,-142.5 1451.5178,-142.5 1451.5178,-123.5 1362.5178,-123.5"/>
+<text text-anchor="middle" x="1407.0178" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
 </g>
 <!-- Node24&#45;&gt;Node9 -->
 <g id="edge14" class="edge">
 <title>Node24&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1383.8421,-179.2455C1381.0416,-171.7776 1377.091,-161.2427 1373.6676,-152.1137"/>
-<polygon fill="#191970" stroke="#191970" points="1376.9045,-150.777 1370.116,-142.6427 1370.3501,-153.2349 1376.9045,-150.777"/>
+<path fill="none" stroke="#191970" d="M1421.0566,-179.2455C1418.8139,-171.8579 1415.6601,-161.4689 1412.9097,-152.4087"/>
+<polygon fill="#191970" stroke="#191970" points="1416.199,-151.1948 1409.945,-142.6427 1409.5008,-153.2282 1416.199,-151.1948"/>
 </g>
 <!-- Node15 -->
 <g id="node16" class="node">
 <title>Node15</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2433,-6 2433,-25 2558,-25 2558,-6 2433,-6"/>
-<text text-anchor="middle" x="2495.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2353.5178,-6 2353.5178,-25 2478.5178,-25 2478.5178,-6 2353.5178,-6"/>
+<text text-anchor="middle" x="2416.0178" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
 </g>
 <!-- Node24&#45;&gt;Node15 -->
 <g id="edge15" class="edge">
 <title>Node24&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1398.7373,-179.3538C1416.8787,-164.3948 1454.3538,-135.9762 1491.5,-123 1664.1299,-62.6958 2222.1676,-29.287 2422.8045,-18.9866"/>
-<polygon fill="#191970" stroke="#191970" points="2423.1329,-22.4745 2432.9419,-18.4702 2422.7767,-15.4836 2423.1329,-22.4745"/>
+<path fill="none" stroke="#191970" d="M1445.563,-179.4842C1500.2192,-155.8195 1649.2202,-94.2605 1780.0178,-67 1885.8576,-44.9411 2200.0377,-26.5809 2343.3249,-19.1031"/>
+<polygon fill="#191970" stroke="#191970" points="2343.5358,-22.5969 2353.341,-18.5836 2343.1731,-15.6063 2343.5358,-22.5969"/>
 </g>
 <!-- Node24&#45;&gt;Node25 -->
 <g id="edge16" class="edge">
 <title>Node24&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M1415.0485,-187.0885C1445.3138,-185.0088 1495.3615,-181.6295 1538.5,-179 1815.5229,-162.1138 2144.9663,-144.5106 2289.8525,-136.8941"/>
-<polygon fill="#191970" stroke="#191970" points="2290.1595,-140.3829 2299.9621,-136.3631 2289.7922,-133.3925 2290.1595,-140.3829"/>
+<path fill="none" stroke="#191970" d="M1451.7882,-186.9069C1556.6431,-179.004 1930.6882,-150.8122 2092.0415,-138.651"/>
+<polygon fill="#191970" stroke="#191970" points="2092.6482,-142.1153 2102.3568,-137.8735 2092.122,-135.1351 2092.6482,-142.1153"/>
 </g>
 <!-- Node24&#45;&gt;Node10 -->
 <g id="edge26" class="edge">
 <title>Node24&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M1409.4662,-179.428C1430.552,-170.2057 1463.2726,-155.8057 1491.5,-143 1510.6943,-134.2922 1514.002,-127.9072 1534.5,-123 1657.0606,-93.6592 2537.3703,-80.9053 2793.665,-77.7884"/>
-<polygon fill="#191970" stroke="#191970" points="2794.0024,-81.2847 2803.9595,-77.6643 2793.918,-74.2852 2794.0024,-81.2847"/>
+<path fill="none" stroke="#191970" d="M1451.5439,-182.2775C1456.3605,-181.1495 1461.3296,-180.0163 1466.0178,-179 1547.4545,-161.3461 1570.771,-168.7596 1650.0178,-143 1670.0626,-136.4843 1672.5315,-127.9557 1693.0178,-123 1799.4582,-97.252 2556.2333,-82.2394 2792.0417,-78.1482"/>
+<polygon fill="#191970" stroke="#191970" points="2792.271,-81.6449 2802.2092,-77.973 2792.1503,-74.6459 2792.271,-81.6449"/>
 </g>
 <!-- Node27 -->
 <g id="node23" class="node">
 <title>Node27</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1544,-123.5 1544,-142.5 1627,-142.5 1627,-123.5 1544,-123.5"/>
-<text text-anchor="middle" x="1585.5" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1261.5178,-123.5 1261.5178,-142.5 1344.5178,-142.5 1344.5178,-123.5 1261.5178,-123.5"/>
+<text text-anchor="middle" x="1303.0178" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
 </g>
 <!-- Node24&#45;&gt;Node27 -->
 <g id="edge28" class="edge">
 <title>Node24&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1415.0311,-181.2134C1447.8055,-171.9439 1502.9565,-156.3456 1541.6298,-145.4077"/>
-<polygon fill="#191970" stroke="#191970" points="1542.9052,-148.6844 1551.5751,-142.5949 1541,-141.9486 1542.9052,-148.6844"/>
+<path fill="none" stroke="#191970" d="M1403.2172,-179.3733C1383.7935,-170.3838 1354.5954,-156.8706 1332.7213,-146.7471"/>
+<polygon fill="#191970" stroke="#191970" points="1334.0923,-143.5249 1323.547,-142.5011 1331.1522,-149.8776 1334.0923,-143.5249"/>
 </g>
 <!-- Node25&#45;&gt;Node18 -->
 <g id="edge25" class="edge">
 <title>Node25&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2299.9161,-127.8344C2150.8568,-116.0187 1771.2377,-86.5366 1453.5,-67 1065.6738,-43.1539 594.3121,-22.3615 467.3186,-16.8977"/>
-<polygon fill="#191970" stroke="#191970" points="467.2437,-13.3914 457.1028,-16.4593 466.9435,-20.3849 467.2437,-13.3914"/>
+<path fill="none" stroke="#191970" d="M2102.2639,-128.4202C1947.7459,-117.5176 1545.2511,-89.2709 1209.0178,-67 906.9429,-46.9916 541.3916,-24.2942 431.667,-17.5136"/>
+<polygon fill="#191970" stroke="#191970" points="431.7652,-14.0131 421.5684,-16.8899 431.3336,-20.9998 431.7652,-14.0131"/>
 </g>
 <!-- Node25&#45;&gt;Node10 -->
 <g id="edge17" class="edge">
 <title>Node25&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2429.224,-125.7364C2523.3327,-115.1751 2697.2344,-95.6591 2793.9751,-84.8024"/>
-<polygon fill="#191970" stroke="#191970" points="2794.4018,-88.2766 2803.949,-83.6831 2793.621,-81.3202 2794.4018,-88.2766"/>
+<path fill="none" stroke="#191970" d="M2231.7776,-127.7819C2362.4904,-117.2497 2656.8829,-93.5289 2791.9206,-82.6481"/>
+<polygon fill="#191970" stroke="#191970" points="2792.4579,-86.1163 2802.1445,-81.8243 2791.8956,-79.1389 2792.4579,-86.1163"/>
 </g>
 <!-- Node25&#45;&gt;Node17 -->
 <g id="edge24" class="edge">
 <title>Node25&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2421.2023,-123.4429C2503.5003,-109.7708 2660.9261,-84.366 2795.5,-67 2959.1901,-45.8768 3154.4929,-27.3387 3240.763,-19.4888"/>
-<polygon fill="#191970" stroke="#191970" points="3241.3353,-22.9514 3250.9785,-18.563 3240.7034,-15.98 3241.3353,-22.9514"/>
+<path fill="none" stroke="#191970" d="M2231.8729,-124.97C2412.4665,-102.6099 2917.9032,-40.0295 3071.3084,-21.0357"/>
+<polygon fill="#191970" stroke="#191970" points="3071.8229,-24.4988 3081.3171,-19.7965 3070.9627,-17.5518 3071.8229,-24.4988"/>
 </g>
 <!-- Node10&#45;&gt;Node18 -->
 <g id="edge22" class="edge">
 <title>Node10&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2803.7589,-75.4874C2458.7709,-66.7526 727.6749,-22.9229 467.3076,-16.3307"/>
-<polygon fill="#191970" stroke="#191970" points="467.0924,-12.8242 457.007,-16.0699 466.9152,-19.8219 467.0924,-12.8242"/>
+<path fill="none" stroke="#191970" d="M2802.3547,-75.5102C2454.3634,-66.8211 693.1849,-22.8452 431.5355,-16.312"/>
+<polygon fill="#191970" stroke="#191970" points="431.6114,-12.8129 421.5271,-16.062 431.4366,-19.8107 431.6114,-12.8129"/>
 </g>
 <!-- Node10&#45;&gt;Node15 -->
 <g id="edge19" class="edge">
 <title>Node10&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M2806.4038,-67.4581C2740.4998,-56.4443 2632.2468,-38.3531 2562.4384,-26.6867"/>
-<polygon fill="#191970" stroke="#191970" points="2562.8675,-23.21 2552.4274,-25.0137 2561.7137,-30.1142 2562.8675,-23.21"/>
+<path fill="none" stroke="#191970" d="M2802.331,-68.7697C2721.1616,-57.577 2576.0906,-37.5728 2488.6847,-25.5202"/>
+<polygon fill="#191970" stroke="#191970" points="2488.9657,-22.0259 2478.5813,-24.127 2488.0094,-28.9603 2488.9657,-22.0259"/>
 </g>
 <!-- Node10&#45;&gt;Node11 -->
 <g id="edge18" class="edge">
 <title>Node10&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2923.2071,-72.0712C3057.9153,-60.951 3385.2284,-33.9311 3533.4074,-21.6989"/>
-<polygon fill="#191970" stroke="#191970" points="3534.0835,-25.1551 3543.7616,-20.8442 3533.5075,-18.1788 3534.0835,-25.1551"/>
+<path fill="none" stroke="#191970" d="M2921.6725,-70.0647C3018.6564,-58.7897 3210.3146,-36.508 3316.2494,-24.1924"/>
+<polygon fill="#191970" stroke="#191970" points="3316.6986,-27.6638 3326.2275,-23.0323 3315.8902,-20.7106 3316.6986,-27.6638"/>
 </g>
 <!-- Node10&#45;&gt;Node16 -->
 <g id="edge20" class="edge">
 <title>Node10&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2803.936,-71.2489C2785.8275,-69.6666 2765.8621,-68.0884 2747.5,-67 2122.3987,-29.9464 1357.1829,-18.1111 1191.9202,-15.9061"/>
-<polygon fill="#191970" stroke="#191970" points="1191.9552,-12.4063 1181.9101,-15.7746 1191.8632,-19.4057 1191.9552,-12.4063"/>
+<path fill="none" stroke="#191970" d="M2802.4861,-71.0596C2785.5467,-69.5392 2767.0676,-68.0414 2750.0178,-67 2135.8204,-29.4849 1383.979,-18.021 1220.3978,-15.8932"/>
+<polygon fill="#191970" stroke="#191970" points="1220.1064,-12.3893 1210.0625,-15.7611 1220.0169,-19.3887 1220.1064,-12.3893"/>
 </g>
 <!-- Node10&#45;&gt;Node17 -->
 <g id="edge21" class="edge">
 <title>Node10&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2923.0673,-68.319C3008.5614,-55.8595 3164.1608,-33.1833 3240.6346,-22.0384"/>
-<polygon fill="#191970" stroke="#191970" points="3241.4941,-25.4502 3250.8848,-20.5446 3240.4845,-18.5234 3241.4941,-25.4502"/>
+<path fill="none" stroke="#191970" d="M2901.4266,-67.4581C2947.4785,-56.3077 3023.4898,-37.9034 3071.5953,-26.2558"/>
+<polygon fill="#191970" stroke="#191970" points="3072.5206,-29.633 3081.4161,-23.878 3070.8733,-22.8296 3072.5206,-29.633"/>
 </g>
 <!-- Node19 -->
 <g id="node22" class="node">
 <title>Node19</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2714.5,-6 2714.5,-25 2764.5,-25 2764.5,-6 2714.5,-6"/>
-<text text-anchor="middle" x="2739.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2709.0178,-6 2709.0178,-25 2759.0178,-25 2759.0178,-6 2709.0178,-6"/>
+<text text-anchor="middle" x="2734.0178" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
 </g>
 <!-- Node10&#45;&gt;Node19 -->
 <g id="edge23" class="edge">
 <title>Node10&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M2844.125,-67.3906C2823.6505,-57.2359 2791.1558,-41.1196 2767.8115,-29.5416"/>
-<polygon fill="#191970" stroke="#191970" points="2769.2528,-26.3496 2758.7389,-25.0419 2766.1425,-32.6207 2769.2528,-26.3496"/>
+<path fill="none" stroke="#191970" d="M2842.0178,-67.3906C2820.7889,-57.1908 2787.0415,-40.9763 2762.9218,-29.3875"/>
+<polygon fill="#191970" stroke="#191970" points="2764.4067,-26.2179 2753.8774,-25.0419 2761.3752,-32.5275 2764.4067,-26.2179"/>
 </g>
 <!-- Node28&#45;&gt;Node15 -->
 <g id="edge32" class="edge">
 <title>Node28&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M2897.5503,-240.9802C2880.9962,-210.1701 2824.2496,-112.537 2747.5,-67 2717.6994,-49.3188 2631.4148,-34.1481 2568.0844,-24.9422"/>
-<polygon fill="#191970" stroke="#191970" points="2568.5247,-21.4697 2558.1295,-23.5165 2567.5322,-28.399 2568.5247,-21.4697"/>
+<path fill="none" stroke="#191970" d="M2867.2411,-240.8896C2844.8711,-232.9529 2816.3746,-219.5967 2798.0178,-199 2756.4834,-152.3976 2798.2618,-106.6157 2750.0178,-67 2711.1844,-35.1119 2574.7914,-22.8402 2488.7761,-18.2104"/>
+<polygon fill="#191970" stroke="#191970" points="2488.7131,-14.7026 2478.5461,-17.6828 2488.3526,-21.6933 2488.7131,-14.7026"/>
 </g>
 <!-- Node28&#45;&gt;Node11 -->
 <g id="edge31" class="edge">
 <title>Node28&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2931.0504,-240.9967C3037.3629,-205.6094 3411.4297,-81.097 3553.3431,-33.8596"/>
-<polygon fill="#191970" stroke="#191970" points="3554.7701,-37.0735 3563.1528,-30.5943 3552.5592,-30.4318 3554.7701,-37.0735"/>
+<path fill="none" stroke="#191970" d="M2970.062,-243.4833C3050.5237,-232.6279 3184.8359,-205.9841 3282.0178,-143 3323.9158,-115.8457 3358.5824,-67.8774 3376.9119,-39.1527"/>
+<polygon fill="#191970" stroke="#191970" points="3379.9531,-40.8902 3382.2774,-30.5534 3374.0143,-37.1846 3379.9531,-40.8902"/>
 </g>
 <!-- Node28&#45;&gt;Node16 -->
 <g id="edge33" class="edge">
 <title>Node28&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2886.0035,-240.835C2830.1467,-208.6994 2642.103,-105.3062 2471.5,-67 2343.251,-38.2037 1381.0936,-19.4918 1191.7805,-16.068"/>
-<polygon fill="#191970" stroke="#191970" points="1191.7021,-12.5661 1181.6408,-15.8859 1191.5763,-19.565 1191.7021,-12.5661"/>
+<path fill="none" stroke="#191970" d="M2878.6852,-240.9076C2791.5895,-203.5142 2478.9846,-69.4585 2468.0178,-67 2343.0776,-38.9917 1407.1314,-19.6994 1220.3807,-16.1066"/>
+<polygon fill="#191970" stroke="#191970" points="1220.1896,-12.6024 1210.1245,-15.9106 1220.0558,-19.6011 1220.1896,-12.6024"/>
 </g>
 <!-- Node28&#45;&gt;Node17 -->
 <g id="edge34" class="edge">
 <title>Node28&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2917.9884,-240.9967C2977.0534,-204.7557 3188.4659,-75.0379 3260.9256,-30.5783"/>
-<polygon fill="#191970" stroke="#191970" points="3263.1258,-33.3346 3269.8189,-25.1216 3259.4649,-27.3682 3263.1258,-33.3346"/>
+<path fill="none" stroke="#191970" d="M2909.7123,-240.9967C2942.3953,-205.2735 3058.1736,-78.725 3100.4323,-32.5354"/>
+<polygon fill="#191970" stroke="#191970" points="3103.0472,-34.8622 3107.2151,-25.1216 3097.8826,-30.1371 3103.0472,-34.8622"/>
 </g>
 <!-- Node29&#45;&gt;Node16 -->
 <g id="edge66" class="edge">
 <title>Node29&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2427.8521,-378.8123C2206.0067,-356.4874 1407.2011,-275.6058 1382.5,-266 1275.2725,-224.3011 1258.2667,-183.4204 1195.5,-87 1184.5178,-70.1295 1174.4385,-49.4606 1167.756,-34.6922"/>
-<polygon fill="#191970" stroke="#191970" points="1170.7695,-32.8521 1163.5261,-25.12 1164.3667,-35.6814 1170.7695,-32.8521"/>
+<path fill="none" stroke="#191970" d="M2670.777,-369.153C2680.5007,-340.46 2696.7335,-277.7324 2671.0178,-235 2588.8014,-98.3792 2507.8725,-108.8752 2354.0178,-67 2242.1065,-36.5407 1397.7123,-19.3707 1220.5585,-16.083"/>
+<polygon fill="#191970" stroke="#191970" points="1220.3842,-12.5793 1210.3215,-15.8948 1220.2555,-19.5781 1220.3842,-12.5793"/>
 </g>
 <!-- Node29&#45;&gt;Node28 -->
 <g id="edge37" class="edge">
 <title>Node29&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2541.2563,-374.6423C2587.6561,-365.867 2654.6419,-351.5532 2711.5,-333 2771.2343,-313.5083 2837.9799,-282.413 2874.6268,-264.4837"/>
-<polygon fill="#191970" stroke="#191970" points="2876.2812,-267.5704 2883.7038,-260.0102 2873.1867,-261.2915 2876.2812,-267.5704"/>
+<path fill="none" stroke="#191970" d="M2717.291,-369.4356C2743.6514,-360.7165 2775.5558,-348.4035 2802.0178,-333 2833.9943,-314.3864 2865.9712,-285.3933 2884.6038,-267.1986"/>
+<polygon fill="#191970" stroke="#191970" points="2887.1005,-269.6519 2891.7334,-260.1237 2882.1698,-264.6831 2887.1005,-269.6519"/>
 </g>
 <!-- Node29&#45;&gt;Node30 -->
 <g id="edge38" class="edge">
 <title>Node29&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M2494.9381,-369.2967C2501.8268,-359.2631 2510.8842,-346.0709 2518.134,-335.5113"/>
-<polygon fill="#191970" stroke="#191970" points="2521.1652,-337.28 2523.9399,-327.055 2515.3944,-333.318 2521.1652,-337.28"/>
+<path fill="none" stroke="#191970" d="M2612.5036,-369.4639C2570.8186,-357.5284 2513.2516,-341.0456 2474.2127,-329.8678"/>
+<polygon fill="#191970" stroke="#191970" points="2474.7925,-326.3932 2464.2154,-327.0053 2472.8656,-333.1228 2474.7925,-326.3932"/>
 </g>
 <!-- Node40 -->
 <g id="node32" class="node">
 <title>Node40</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2446,-241 2446,-260 2511,-260 2511,-241 2446,-241"/>
-<text text-anchor="middle" x="2478.5" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">functional</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2596.5178,-241 2596.5178,-260 2661.5178,-260 2661.5178,-241 2596.5178,-241"/>
+<text text-anchor="middle" x="2629.0178" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">functional</text>
 </g>
 <!-- Node29&#45;&gt;Node40 -->
 <g id="edge65" class="edge">
 <title>Node29&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M2474.9113,-369.4025C2469.1936,-359.4813 2462.4838,-345.9794 2459.5,-333 2454.4823,-311.1735 2462.1691,-286.1017 2469.2143,-269.3807"/>
-<polygon fill="#191970" stroke="#191970" points="2472.4567,-270.7052 2473.3987,-260.1523 2466.0815,-267.8144 2472.4567,-270.7052"/>
+<path fill="none" stroke="#191970" d="M2660.9581,-369.389C2654.3964,-344.9649 2641.4666,-296.8371 2634.2323,-269.9095"/>
+<polygon fill="#191970" stroke="#191970" points="2637.5728,-268.8536 2631.598,-260.1042 2630.8125,-270.6698 2637.5728,-268.8536"/>
 </g>
 <!-- Node30&#45;&gt;Node18 -->
 <g id="edge63" class="edge">
 <title>Node30&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2467.7639,-312.778C2422.8679,-309.4918 2360.9836,-305.1517 2306.5,-302 1962.7028,-282.1125 1873.4659,-314.3143 1532.5,-266 1156.4805,-212.7187 1072.7097,-147.2055 701.5,-67 617.2795,-48.8029 517.4316,-30.3754 467.0498,-21.2958"/>
-<polygon fill="#191970" stroke="#191970" points="467.5345,-17.8269 457.0732,-19.5036 466.2968,-24.7166 467.5345,-17.8269"/>
+<path fill="none" stroke="#191970" d="M2368.3664,-313.9083C2188.3395,-303.5195 1680.1474,-273.6889 1645.0178,-266 1607.3627,-257.7584 1601.4014,-244.3962 1564.0178,-235 1441.4787,-204.2002 1406.3488,-221.5001 1282.0178,-199 1243.8287,-192.0889 1234.8962,-187.4498 1197.0178,-179 961.6115,-126.4863 902.4403,-114.7308 666.0178,-67 581.5579,-49.9486 481.8119,-31.0331 431.5084,-21.579"/>
+<polygon fill="#191970" stroke="#191970" points="432.0221,-18.1143 421.548,-19.7092 430.7306,-24.9942 432.0221,-18.1143"/>
 </g>
 <!-- Node30&#45;&gt;Node20 -->
 <g id="edge64" class="edge">
 <title>Node30&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2532.8336,-307.8338C2536.4123,-290.3471 2540.8731,-253.556 2520.5,-235 2504.6805,-220.5914 1107.7238,-194.0041 872.0723,-189.6196"/>
-<polygon fill="#191970" stroke="#191970" points="872.1023,-186.1196 862.039,-189.4333 871.9723,-193.1184 872.1023,-186.1196"/>
+<path fill="none" stroke="#191970" d="M2368.2937,-314.2316C2231.0728,-306.9328 1893.9453,-288.1929 1612.0178,-266 1455.9805,-253.717 964.728,-205.4564 831.5849,-192.318"/>
+<polygon fill="#191970" stroke="#191970" points="831.865,-188.8287 821.5695,-191.3292 831.1772,-195.7948 831.865,-188.8287"/>
 </g>
 <!-- Node30&#45;&gt;Node10 -->
 <g id="edge60" class="edge">
 <title>Node30&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2570.349,-307.926C2599.032,-299.7955 2637.7499,-286.1909 2667.5,-266 2738.5705,-217.7656 2732.9726,-179.6903 2797.5,-123 2810.3338,-111.7249 2825.9703,-100.7232 2838.8017,-92.3075"/>
-<polygon fill="#191970" stroke="#191970" points="2841.051,-95.0223 2847.569,-86.6696 2837.2649,-89.1345 2841.051,-95.0223"/>
+<path fill="none" stroke="#191970" d="M2439.9326,-307.758C2454.8714,-291.6921 2486.2568,-259.0291 2516.0178,-235 2585.8387,-178.6264 2602.0953,-159.6284 2684.0178,-123 2719.4223,-107.1703 2761.3177,-96.0044 2795.5666,-88.6413"/>
+<polygon fill="#191970" stroke="#191970" points="2796.3994,-92.043 2805.4718,-86.5711 2794.9673,-85.191 2796.3994,-92.043"/>
 </g>
 <!-- Node30&#45;&gt;Node11 -->
 <g id="edge39" class="edge">
 <title>Node30&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2563.9755,-307.9751C2618.505,-292.4892 2729.9264,-260.9697 2824.5,-235 3069.0178,-167.8559 3131.8114,-157.094 3375.5,-87 3435.8628,-69.6374 3504.5634,-48.3701 3551.637,-33.5641"/>
-<polygon fill="#191970" stroke="#191970" points="3552.8567,-36.8495 3561.3429,-30.5065 3550.7533,-30.1729 3552.8567,-36.8495"/>
+<path fill="none" stroke="#191970" d="M2454.7684,-307.9863C2477.8505,-298.4163 2513.6245,-282.7169 2543.0178,-266 2564.1287,-253.9935 2565.3441,-243.7023 2588.0178,-235 2703.5094,-190.6739 2743.6373,-227.4893 2864.0178,-199 3047.7615,-155.5151 3257.184,-71.8836 3346.6858,-34.4388"/>
+<polygon fill="#191970" stroke="#191970" points="3348.1199,-37.6327 3355.9847,-30.5337 3345.4095,-31.1787 3348.1199,-37.6327"/>
 </g>
 <!-- Node30&#45;&gt;Node19 -->
 <g id="edge61" class="edge">
 <title>Node30&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M2537.2171,-307.794C2567.0668,-264.6619 2687.9396,-90.0036 2727.1522,-33.3422"/>
-<polygon fill="#191970" stroke="#191970" points="2730.0994,-35.234 2732.9121,-25.0193 2724.3434,-31.2505 2730.0994,-35.234"/>
+<path fill="none" stroke="#191970" d="M2449.631,-307.8379C2466.9551,-298.3907 2493.0332,-282.9815 2513.0178,-266 2602.9646,-189.5697 2689.5758,-76.338 2721.3046,-33.1276"/>
+<polygon fill="#191970" stroke="#191970" points="2724.161,-35.1508 2727.2236,-25.0083 2718.5044,-31.0272 2724.161,-35.1508"/>
 </g>
 <!-- Node30&#45;&gt;Node28 -->
 <g id="edge59" class="edge">
 <title>Node30&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2576.8804,-307.9623C2586.965,-305.9448 2597.5828,-303.8635 2607.5,-302 2685.5067,-287.3422 2775.3884,-271.8513 2835.239,-261.734"/>
-<polygon fill="#191970" stroke="#191970" points="2836.0386,-265.1486 2845.3167,-260.0332 2834.8736,-258.2462 2836.0386,-265.1486"/>
+<path fill="none" stroke="#191970" d="M2493.9164,-308.5336C2579.3863,-296.3496 2732.0902,-274.5812 2824.2226,-261.4474"/>
+<polygon fill="#191970" stroke="#191970" points="2824.916,-264.884 2834.3219,-260.0077 2823.928,-257.9541 2824.916,-264.884"/>
 </g>
 <!-- Node30&#45;&gt;Node31 -->
 <g id="edge40" class="edge">
 <title>Node30&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M2467.7709,-312.6536C2422.8781,-309.3089 2360.9949,-304.9498 2306.5,-302 1897.4529,-279.8583 1794.5636,-287.8341 1385.5,-266 1333.4488,-263.2217 1274.8,-259.2357 1229.8866,-256.0016"/>
-<polygon fill="#191970" stroke="#191970" points="1229.8773,-252.4918 1219.6507,-255.2604 1229.3718,-259.4736 1229.8773,-252.4918"/>
+<path fill="none" stroke="#191970" d="M2368.3951,-314.1201C2307.7098,-310.8789 2213.0652,-305.914 2131.0178,-302 1765.3061,-284.5542 1672.7013,-298.4858 1308.0178,-266 1294.4542,-264.7918 1280.0359,-263.141 1266.1597,-261.3656"/>
+<polygon fill="#191970" stroke="#191970" points="1266.5419,-257.8858 1256.1716,-260.0553 1265.6314,-264.8263 1266.5419,-257.8858"/>
 </g>
 <!-- Node30&#45;&gt;Node33 -->
 <g id="edge43" class="edge">
 <title>Node30&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M2467.9429,-312.9737C2302.5438,-301.0064 1854.2954,-268.5737 1677.557,-255.786"/>
-<polygon fill="#191970" stroke="#191970" points="1677.8017,-252.2946 1667.5751,-255.0638 1677.2964,-259.2764 1677.8017,-252.2946"/>
+<path fill="none" stroke="#191970" d="M2368.2808,-311.6129C2236.4004,-299.2376 1931.0271,-270.5821 1790.3684,-257.383"/>
+<polygon fill="#191970" stroke="#191970" points="1790.4065,-253.8713 1780.1232,-256.4217 1789.7525,-260.8407 1790.4065,-253.8713"/>
 </g>
 <!-- Node30&#45;&gt;Node40 -->
 <g id="edge62" class="edge">
 <title>Node30&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M2523.0497,-307.9005C2515.0179,-297.552 2502.0585,-280.8542 2492.1826,-268.1295"/>
-<polygon fill="#191970" stroke="#191970" points="2494.8328,-265.8356 2485.9365,-260.0817 2489.3028,-270.1275 2494.8328,-265.8356"/>
+<path fill="none" stroke="#191970" d="M2459.3864,-307.9005C2493.9385,-296.2086 2552.4415,-276.4122 2590.9025,-263.3976"/>
+<polygon fill="#191970" stroke="#191970" points="2592.3513,-266.6024 2600.7018,-260.0817 2590.1076,-259.9717 2592.3513,-266.6024"/>
 </g>
 <!-- Node31&#45;&gt;Node18 -->
 <g id="edge41" class="edge">
 <title>Node31&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1123.7926,-235.461C1067.5637,-209.9878 948.5703,-157.7402 844.5,-123 706.4045,-76.9017 536.8796,-37.8498 466.9834,-22.4857"/>
-<polygon fill="#191970" stroke="#191970" points="467.5412,-19.0251 457.0245,-20.3091 466.0465,-25.8637 467.5412,-19.0251"/>
+<path fill="none" stroke="#191970" d="M1149.6343,-235.499C1121.1779,-225.5064 1083.2269,-211.8896 1050.0178,-199 1028.4726,-190.6376 1023.8835,-186.4843 1002.0178,-179 788.302,-105.8485 523.2506,-43.5138 431.8171,-22.8068"/>
+<polygon fill="#191970" stroke="#191970" points="432.312,-19.3306 421.787,-20.5448 430.772,-26.1591 432.312,-19.3306"/>
 </g>
 <!-- Node31&#45;&gt;Node24 -->
 <g id="edge42" class="edge">
 <title>Node31&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1213.009,-235.4554C1255.6184,-224.1113 1312.8698,-208.8691 1349.9761,-198.9901"/>
-<polygon fill="#191970" stroke="#191970" points="1351.116,-202.3087 1359.8789,-196.3537 1349.315,-195.5443 1351.116,-202.3087"/>
+<path fill="none" stroke="#191970" d="M1249.5267,-235.4554C1292.1362,-224.1113 1349.3876,-208.8691 1386.4939,-198.9901"/>
+<polygon fill="#191970" stroke="#191970" points="1387.6337,-202.3087 1396.3967,-196.3537 1385.8328,-195.5443 1387.6337,-202.3087"/>
 </g>
 <!-- Node33&#45;&gt;Node22 -->
 <g id="edge49" class="edge">
 <title>Node33&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1635.3316,-235.4852C1650.3368,-226.7445 1667.4718,-214.3971 1678.5,-199 1688.2518,-185.3849 1693.0223,-166.8428 1695.342,-152.8783"/>
-<polygon fill="#191970" stroke="#191970" points="1698.827,-153.2263 1696.7252,-142.842 1691.8925,-152.2705 1698.827,-153.2263"/>
+<path fill="none" stroke="#191970" d="M1719.2402,-235.1389C1722.3031,-213.9692 1727.8229,-175.8174 1731.1804,-152.6112"/>
+<polygon fill="#191970" stroke="#191970" points="1734.6667,-152.9576 1732.6347,-142.5595 1727.7388,-151.9552 1734.6667,-152.9576"/>
 </g>
 <!-- Node33&#45;&gt;Node23 -->
 <g id="edge53" class="edge">
 <title>Node33&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1667.7346,-235.7692C1718.0443,-224.0493 1786.844,-208.0221 1829.4377,-198.0997"/>
-<polygon fill="#191970" stroke="#191970" points="1830.4689,-201.4533 1839.4141,-195.7757 1828.8807,-194.6358 1830.4689,-201.4533"/>
+<path fill="none" stroke="#191970" d="M1653.7135,-243.9763C1562.3224,-234.4875 1388.1255,-216.1368 1240.0178,-199 1226.4681,-197.4322 1211.655,-195.6295 1198.4447,-193.9902"/>
+<polygon fill="#191970" stroke="#191970" points="1198.6051,-190.4832 1188.2487,-192.7182 1197.7385,-197.4293 1198.6051,-190.4832"/>
 </g>
 <!-- Node33&#45;&gt;Node18 -->
 <g id="edge56" class="edge">
 <title>Node33&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1541.418,-243.9871C1482.6831,-236.7943 1393.2115,-222.9351 1318.5,-199 1187.8624,-157.148 1171.9461,-102.7184 1039.5,-67 929.9205,-37.4484 576.6439,-21.174 467.1604,-16.7489"/>
-<polygon fill="#191970" stroke="#191970" points="467.1835,-13.2471 457.0521,-16.3462 466.9048,-20.2416 467.1835,-13.2471"/>
+<path fill="none" stroke="#191970" d="M1653.6392,-237.3387C1649.0369,-236.5069 1644.4602,-235.7171 1640.0178,-235 1509.6219,-213.9516 1472.6533,-232.9898 1345.0178,-199 1199.1266,-160.1486 1178.6614,-102.9074 1032.0178,-67 916.2086,-38.6428 544.2916,-21.4425 431.6508,-16.787"/>
+<polygon fill="#191970" stroke="#191970" points="431.7113,-13.2867 421.5768,-16.3757 431.4257,-20.2808 431.7113,-13.2867"/>
 </g>
 <!-- Node33&#45;&gt;Node20 -->
 <g id="edge58" class="edge">
 <title>Node33&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1541.4895,-245.4411C1386.534,-233.0001 990.9128,-201.2368 872.4908,-191.729"/>
-<polygon fill="#191970" stroke="#191970" points="872.4742,-188.2165 862.2262,-190.9049 871.914,-195.1941 872.4742,-188.2165"/>
+<path fill="none" stroke="#191970" d="M1653.8512,-246.2729C1474.7952,-234.2903 968.0766,-200.3804 832.0059,-191.2745"/>
+<polygon fill="#191970" stroke="#191970" points="831.9918,-187.7658 821.7804,-190.5902 831.5244,-194.7502 831.9918,-187.7658"/>
 </g>
 <!-- Node33&#45;&gt;Node24 -->
 <g id="edge45" class="edge">
 <title>Node33&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1551.4158,-235.4554C1512.245,-224.354 1459.9019,-209.5194 1425.0124,-199.6314"/>
-<polygon fill="#191970" stroke="#191970" points="1425.7954,-196.2155 1415.22,-196.8561 1423.8867,-202.9503 1425.7954,-196.2155"/>
+<path fill="none" stroke="#191970" d="M1653.8019,-237.2311C1595.3415,-224.9604 1510.3861,-207.1285 1461.8307,-196.9368"/>
+<polygon fill="#191970" stroke="#191970" points="1462.261,-193.451 1451.7552,-194.822 1460.823,-200.3017 1462.261,-193.451"/>
 </g>
 <!-- Node33&#45;&gt;Node9 -->
 <g id="edge44" class="edge">
 <title>Node33&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1541.2331,-241.8899C1470.0063,-231.5521 1362.5684,-213.7609 1350.5,-199 1339.5857,-185.6506 1346.3638,-166.0135 1354.1814,-151.6296"/>
-<polygon fill="#191970" stroke="#191970" points="1357.4219,-153.0306 1359.5551,-142.6527 1351.4158,-149.4353 1357.4219,-153.0306"/>
+<path fill="none" stroke="#191970" d="M1653.6225,-237.4393C1649.024,-236.5839 1644.4529,-235.7613 1640.0178,-235 1584.0478,-225.3927 1424.2376,-241.8908 1387.0178,-199 1375.431,-185.6478 1383.8639,-165.7415 1393.0771,-151.2836"/>
+<polygon fill="#191970" stroke="#191970" points="1396.2133,-152.9035 1399.0416,-142.6932 1390.4633,-148.9112 1396.2133,-152.9035"/>
 </g>
 <!-- Node33&#45;&gt;Node15 -->
 <g id="edge46" class="edge">
 <title>Node33&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1630.1295,-235.477C1658.479,-219.4343 1705.9247,-194.2641 1749.5,-179 1988.8379,-95.1615 2286.9669,-45.3327 2422.4352,-25.4937"/>
-<polygon fill="#191970" stroke="#191970" points="2423.2949,-28.9056 2432.6878,-24.0041 2422.2885,-21.9783 2423.2949,-28.9056"/>
+<path fill="none" stroke="#191970" d="M1728.4771,-235.2867C1741.584,-218.9566 1764.547,-193.4233 1790.0178,-179 1972.5531,-75.6363 2221.9037,-36.0707 2343.4493,-22.2286"/>
+<polygon fill="#191970" stroke="#191970" points="2343.8416,-25.7066 2353.3928,-21.1213 2343.0669,-18.7496 2343.8416,-25.7066"/>
 </g>
 <!-- Node33&#45;&gt;Node25 -->
 <g id="edge47" class="edge">
 <title>Node33&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M1648.8276,-235.4454C1677.6527,-225.4865 1715.9754,-211.9272 1749.5,-199 1771.0635,-190.6851 1775.0366,-184.4332 1797.5,-179 1992.5158,-131.8319 2047.5442,-159.5432 2247.5,-143 2261.1514,-141.8706 2275.6967,-140.6451 2289.7262,-139.4519"/>
-<polygon fill="#191970" stroke="#191970" points="2290.164,-142.9273 2299.8306,-138.5905 2289.5694,-135.9526 2290.164,-142.9273"/>
+<path fill="none" stroke="#191970" d="M1736.0332,-235.4299C1758.1956,-218.721 1796.5777,-192.3666 1834.0178,-179 1880.1195,-162.5411 2008.7916,-147.9004 2092.0237,-139.7624"/>
+<polygon fill="#191970" stroke="#191970" points="2092.5755,-143.2255 2102.1919,-138.7784 2091.9012,-136.258 2092.5755,-143.2255"/>
 </g>
 <!-- Node33&#45;&gt;Node10 -->
 <g id="edge48" class="edge">
 <title>Node33&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M1667.5534,-235.4334C1703.2069,-226.2511 1748.3497,-213.5061 1787.5,-199 1807.2641,-191.677 1810.0638,-184.1583 1830.5,-179 2092.9636,-112.7511 2169.3412,-171.8022 2438.5,-143 2568.7506,-129.0621 2720.0701,-103.2325 2802.4145,-88.355"/>
-<polygon fill="#191970" stroke="#191970" points="2803.1369,-91.7812 2812.3515,-86.5522 2801.8873,-84.8936 2803.1369,-91.7812"/>
+<path fill="none" stroke="#191970" d="M1750.6966,-235.2922C1772.7866,-225.2147 1802.2453,-211.581 1828.0178,-199 1845.5231,-190.4547 1848.3234,-184.4751 1867.0178,-179 2027.2766,-132.0644 2074.877,-159.8244 2241.0178,-143 2441.0097,-122.7476 2677.0443,-97.2121 2792.0155,-84.6686"/>
+<polygon fill="#191970" stroke="#191970" points="2792.5291,-88.1335 2802.0903,-83.5689 2791.7695,-81.1748 2792.5291,-88.1335"/>
 </g>
 <!-- Node33&#45;&gt;Node16 -->
 <g id="edge54" class="edge">
 <title>Node33&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1541.3879,-244.3437C1473.9855,-236.704 1372.6489,-221.8263 1341.5,-199 1287.5974,-159.4995 1318.1834,-112.7789 1269.5,-67 1247.3216,-46.1448 1215.1727,-32.4329 1191.3532,-24.4403"/>
-<polygon fill="#191970" stroke="#191970" points="1192.299,-21.0682 1181.7083,-21.3609 1190.17,-27.7366 1192.299,-21.0682"/>
+<path fill="none" stroke="#191970" d="M1663.383,-235.419C1636.0499,-226.6607 1602.8068,-214.3177 1575.0178,-199 1526.7471,-172.3924 1525.1791,-149.805 1477.0178,-123 1389.1309,-74.0851 1274.5745,-39.0641 1219.8394,-23.8917"/>
+<polygon fill="#191970" stroke="#191970" points="1220.5943,-20.4697 1210.0252,-21.2071 1218.7473,-27.2216 1220.5943,-20.4697"/>
 </g>
 <!-- Node33&#45;&gt;Node17 -->
 <g id="edge57" class="edge">
 <title>Node33&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M1667.5815,-245.5282C1748.9859,-238.4522 1894.7443,-223.5407 2017.5,-199 2141.4234,-174.2259 2167.9827,-149.7251 2291.5,-123 2652.3079,-44.933 3099.659,-22.3832 3240.7345,-16.9867"/>
-<polygon fill="#191970" stroke="#191970" points="3240.8817,-20.4838 3250.7442,-16.6132 3240.6206,-13.4887 3240.8817,-20.4838"/>
+<path fill="none" stroke="#191970" d="M1780.1602,-244.9777C1834.2851,-238.6361 1913.4976,-225.4759 1978.0178,-199 2034.6955,-175.7423 2035.35,-143.6812 2093.0178,-123 2277.6336,-56.792 2898.5614,-25.0587 3071.1677,-17.382"/>
+<polygon fill="#191970" stroke="#191970" points="3071.5752,-20.8675 3081.4117,-16.9313 3071.2675,-13.8743 3071.5752,-20.8675"/>
 </g>
 <!-- Node33&#45;&gt;Node27 -->
 <g id="edge52" class="edge">
 <title>Node33&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1574.5434,-235.3925C1560.8812,-226.7636 1546.0865,-214.5426 1538.5,-199 1529.4624,-180.4845 1546.1518,-161.5511 1562.0875,-148.7389"/>
-<polygon fill="#191970" stroke="#191970" points="1564.3216,-151.4382 1570.2119,-142.6317 1560.1154,-145.8428 1564.3216,-151.4382"/>
+<path fill="none" stroke="#191970" d="M1653.6266,-237.4151C1649.0272,-236.5653 1644.4547,-235.7507 1640.0178,-235 1524.1261,-215.393 1484.9002,-247.9024 1378.0178,-199 1353.5315,-187.7967 1331.1612,-165.9086 1317.3459,-150.4154"/>
+<polygon fill="#191970" stroke="#191970" points="1319.7701,-147.8669 1310.5918,-142.5745 1314.4665,-152.4354 1319.7701,-147.8669"/>
 </g>
 <!-- Node34 -->
 <g id="node29" class="node">
 <title>Node34</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1548,-179.5 1548,-198.5 1601,-198.5 1601,-179.5 1548,-179.5"/>
-<text text-anchor="middle" x="1574.5" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstddef</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1584.5178,-179.5 1584.5178,-198.5 1637.5178,-198.5 1637.5178,-179.5 1584.5178,-179.5"/>
+<text text-anchor="middle" x="1611.0178" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstddef</text>
 </g>
 <!-- Node33&#45;&gt;Node34 -->
 <g id="edge50" class="edge">
 <title>Node33&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M1597.0843,-235.2977C1593.0501,-227.0277 1588.0227,-216.7215 1583.736,-207.9339"/>
-<polygon fill="#191970" stroke="#191970" points="1586.8334,-206.3001 1579.3034,-198.8469 1580.542,-209.3691 1586.8334,-206.3001"/>
+<path fill="none" stroke="#191970" d="M1691.0873,-235.4554C1674.4869,-225.8241 1653.0434,-213.3828 1636.5261,-203.7996"/>
+<polygon fill="#191970" stroke="#191970" points="1637.8501,-200.5214 1627.444,-198.5303 1634.3372,-206.5761 1637.8501,-200.5214"/>
 </g>
 <!-- Node35 -->
 <g id="node30" class="node">
 <title>Node35</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1619.5,-179.5 1619.5,-198.5 1669.5,-198.5 1669.5,-179.5 1619.5,-179.5"/>
-<text text-anchor="middle" x="1644.5" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstring</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1656.0178,-179.5 1656.0178,-198.5 1706.0178,-198.5 1706.0178,-179.5 1656.0178,-179.5"/>
+<text text-anchor="middle" x="1681.0178" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstring</text>
 </g>
 <!-- Node33&#45;&gt;Node35 -->
 <g id="edge51" class="edge">
 <title>Node33&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1614.3876,-235.2977C1619.8822,-226.8498 1626.7586,-216.2773 1632.5527,-207.369"/>
-<polygon fill="#191970" stroke="#191970" points="1635.5772,-209.1381 1638.0955,-198.8469 1629.7092,-205.3215 1635.5772,-209.1381"/>
+<path fill="none" stroke="#191970" d="M1708.1189,-235.2977C1703.2259,-226.9388 1697.1152,-216.4997 1691.9354,-207.6509"/>
+<polygon fill="#191970" stroke="#191970" points="1694.8542,-205.7089 1686.7818,-198.8469 1688.8131,-209.2452 1694.8542,-205.7089"/>
 </g>
 <!-- Node33&#45;&gt;Node36 -->
 <g id="edge55" class="edge">
 <title>Node33&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1667.572,-239.8916C1726.4771,-229.9505 1817.1357,-214.5653 1905.7935,-199.1134"/>
-<polygon fill="#191970" stroke="#191970" points="1906.5043,-202.5423 1915.7539,-197.3756 1905.3011,-195.6465 1906.5043,-202.5423"/>
+<path fill="none" stroke="#191970" d="M1767.4111,-235.4554C1802.3857,-225.0139 1848.4224,-211.27 1881.3305,-201.4455"/>
+<polygon fill="#191970" stroke="#191970" points="1882.5143,-204.7448 1891.0951,-198.5303 1880.5117,-198.0373 1882.5143,-204.7448"/>
 </g>
 <!-- Node41&#45;&gt;Node21 -->
 <g id="edge74" class="edge">
 <title>Node41&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2205.3871,-370.5314C2202.0527,-369.9554 2198.7398,-369.4376 2195.5,-369 1838.3878,-320.7626 1745.3831,-351.4426 1385.5,-333 1319.9366,-329.6401 1245.3508,-325.2636 1191.7075,-322.0182"/>
-<polygon fill="#191970" stroke="#191970" points="1191.7983,-318.5174 1181.6047,-321.4054 1191.3745,-325.5045 1191.7983,-318.5174"/>
+<path fill="none" stroke="#191970" d="M2043.9091,-370.4998C2040.5736,-369.9318 2037.2594,-369.4244 2034.0178,-369 1596.5725,-311.7236 1482.439,-358.849 1042.0178,-333 1003.1752,-330.7203 959.9585,-327.2884 924.4716,-324.233"/>
+<polygon fill="#191970" stroke="#191970" points="924.4656,-320.7194 914.2001,-323.3407 923.8597,-327.6931 924.4656,-320.7194"/>
 </g>
 <!-- Node41&#45;&gt;Node23 -->
 <g id="edge97" class="edge">
 <title>Node41&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2205.2853,-370.447C2151.335,-357.2587 2077.5019,-338.7281 2064.5,-333 2014.5244,-310.9827 2005.4934,-298.3558 1961.5,-266 1934.7021,-246.2909 1905.3903,-221.4277 1886.9793,-205.3743"/>
-<polygon fill="#191970" stroke="#191970" points="1889.1609,-202.6322 1879.3355,-198.6685 1884.5446,-207.8943 1889.1609,-202.6322"/>
+<path fill="none" stroke="#191970" d="M2048.324,-369.4484C1982.9735,-351.5267 1869.0699,-321.5661 1770.0178,-302 1662.4774,-280.7572 1633.9581,-285.1071 1526.0178,-266 1507.9183,-262.7961 1219.9707,-202.9432 1202.0178,-199 1200.7545,-198.7225 1199.471,-198.4382 1198.1752,-198.1491"/>
+<polygon fill="#191970" stroke="#191970" points="1198.6664,-194.6717 1188.1401,-195.8747 1197.1191,-201.4986 1198.6664,-194.6717"/>
 </g>
 <!-- Node41&#45;&gt;Node18 -->
 <g id="edge101" class="edge">
 <title>Node41&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2205.3596,-370.7179C2202.0322,-370.0943 2198.7284,-369.5153 2195.5,-369 2073.0759,-349.4594 1203.9381,-299.2272 1084.5,-266 998.8964,-242.1854 986.2094,-213.9096 904.5,-179 839.0612,-151.0418 821.5708,-146.7783 754.5,-123 699.4821,-103.4948 536.328,-49.2541 466.6404,-26.1462"/>
-<polygon fill="#191970" stroke="#191970" points="467.6542,-22.795 457.0608,-22.9705 465.4516,-29.4394 467.6542,-22.795"/>
+<path fill="none" stroke="#191970" d="M2043.9077,-370.5106C2040.5725,-369.9399 2037.2588,-369.429 2034.0178,-369 1830.9656,-342.125 1308.0105,-395.6921 1113.0178,-333 1089.1953,-325.3408 1087.5858,-314.6883 1066.0178,-302 964.0708,-242.0256 938.9844,-225.0045 830.0178,-179 709.731,-128.2163 672.3147,-135.3214 551.0178,-87 506.5255,-69.2755 456.5644,-44.8671 426.4612,-29.6447"/>
+<polygon fill="#191970" stroke="#191970" points="427.9174,-26.4587 417.4173,-25.0452 424.7442,-32.6981 427.9174,-26.4587"/>
 </g>
 <!-- Node41&#45;&gt;Node20 -->
 <g id="edge102" class="edge">
 <title>Node41&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2205.3634,-370.6937C2202.0351,-370.0763 2198.73,-369.5052 2195.5,-369 2129.8295,-358.7284 1065.0972,-290.9781 1003.5,-266 983.3016,-257.8094 984.4591,-245.7523 965.5,-235 935.7871,-218.1489 898.7698,-205.5813 872.3804,-197.8784"/>
-<polygon fill="#191970" stroke="#191970" points="873.0507,-194.4305 862.4751,-195.0693 871.1408,-201.1649 873.0507,-194.4305"/>
+<path fill="none" stroke="#191970" d="M2043.9084,-370.5054C2040.5731,-369.9359 2037.2591,-369.4268 2034.0178,-369 1823.6823,-341.3055 1285.3127,-386.4977 1080.0178,-333 1079.5901,-332.8886 897.5691,-239.8766 826.2266,-203.4168"/>
+<polygon fill="#191970" stroke="#191970" points="827.4222,-200.0973 816.9249,-198.6631 824.2366,-206.3305 827.4222,-200.0973"/>
 </g>
 <!-- Node41&#45;&gt;Node15 -->
 <g id="edge82" class="edge">
 <title>Node41&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M2281.0086,-369.4714C2330.0486,-327.3011 2465.5783,-210.2021 2471.5,-199 2499.8296,-145.4088 2499.4374,-71.1396 2497.2716,-35.7808"/>
-<polygon fill="#191970" stroke="#191970" points="2500.7301,-35.0846 2496.5081,-25.3673 2493.7488,-35.5965 2500.7301,-35.0846"/>
+<path fill="none" stroke="#191970" d="M2115.4533,-369.386C2149.0021,-331.5736 2238.9995,-229.6924 2312.0178,-143 2344.6326,-104.2775 2381.9357,-58.0599 2401.8029,-33.2853"/>
+<polygon fill="#191970" stroke="#191970" points="2404.7736,-35.175 2408.2925,-25.1816 2399.3097,-30.7994 2404.7736,-35.175"/>
 </g>
 <!-- Node41&#45;&gt;Node10 -->
 <g id="edge94" class="edge">
 <title>Node41&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2321.5343,-378.8728C2404.5143,-370.2866 2552.5599,-352.8089 2602.5,-333 2725.2915,-284.2943 2820.9348,-145.5797 2852.4132,-95.4006"/>
-<polygon fill="#191970" stroke="#191970" points="2855.5717,-96.9469 2857.8424,-86.5982 2849.6138,-93.2721 2855.5717,-96.9469"/>
+<path fill="none" stroke="#191970" d="M2160.0739,-381.0447C2262.3004,-374.3987 2470.0411,-358.2216 2538.0178,-333 2561.1317,-324.424 2562.2961,-313.6607 2584.0178,-302 2620.8874,-282.2075 2636.9227,-290.2618 2671.0178,-266 2740.2746,-216.7173 2734.8979,-179.9312 2798.0178,-123 2810.5276,-111.7167 2825.8074,-100.647 2838.2995,-92.195"/>
+<polygon fill="#191970" stroke="#191970" points="2840.4269,-94.9838 2846.8234,-86.5378 2836.556,-89.1514 2840.4269,-94.9838"/>
 </g>
 <!-- Node41&#45;&gt;Node11 -->
 <g id="edge73" class="edge">
 <title>Node41&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2321.6435,-378.6158C2350.981,-375.674 2387.1233,-372.0913 2419.5,-369 2594.1039,-352.3289 2640.6502,-368.1013 2812.5,-333 2901.6142,-314.7979 3343.5688,-156.689 3374.5,-143 3448.5135,-110.2443 3530.6495,-62.7134 3575.2408,-35.8992"/>
-<polygon fill="#191970" stroke="#191970" points="3577.1576,-38.8303 3583.9074,-30.6638 3573.5382,-32.8387 3577.1576,-38.8303"/>
+<path fill="none" stroke="#191970" d="M2160.2706,-382.9137C2270.2127,-379.197 2515.5326,-367.4564 2719.0178,-333 2836.6736,-313.0772 2864.0169,-297.8529 2979.0178,-266 3079.9819,-238.035 3108.243,-239.1304 3205.0178,-199 3284.8634,-165.8898 3317.1378,-159.6214 3364.0178,-87 3373.2831,-72.6472 3380.0581,-54.6462 3384.5042,-40.297"/>
+<polygon fill="#191970" stroke="#191970" points="3387.8929,-41.1783 3387.3323,-30.5983 3381.1728,-39.2187 3387.8929,-41.1783"/>
 </g>
 <!-- Node41&#45;&gt;Node16 -->
 <g id="edge98" class="edge">
 <title>Node41&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2257.7615,-369.4169C2254.1667,-359.2765 2249.7945,-345.5246 2247.5,-333 2239.6367,-290.0772 2251.362,-277.5081 2241.5,-235 2223.2191,-156.2039 2237.6413,-113.5654 2171.5,-67 2130.5044,-38.1379 1359.8401,-19.8367 1191.7127,-16.1779"/>
-<polygon fill="#191970" stroke="#191970" points="1191.5971,-12.6747 1181.5239,-15.9582 1191.4461,-19.6731 1191.5971,-12.6747"/>
+<path fill="none" stroke="#191970" d="M2043.849,-370.8845C2040.5288,-370.2185 2037.2343,-369.5847 2034.0178,-369 1917.508,-347.8204 1887.0794,-350.8797 1770.0178,-333 1758.9894,-331.3155 1384.5191,-269.7664 1374.0178,-266 1347.969,-256.6573 1346.0131,-244.4906 1320.0178,-235 1235.5886,-204.1758 1178.6387,-267.9801 1121.0178,-199 1078.3451,-147.915 1140.4842,-67.3038 1171.8063,-32.466"/>
+<polygon fill="#191970" stroke="#191970" points="1174.4747,-34.7352 1178.6755,-25.0087 1169.3261,-29.9926 1174.4747,-34.7352"/>
 </g>
 <!-- Node41&#45;&gt;Node17 -->
 <g id="edge100" class="edge">
 <title>Node41&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2321.8106,-378.631C2424.597,-368.1336 2634.0077,-346.0249 2706.5,-333 2829.8898,-310.8301 2868.3067,-321.9394 2980.5,-266 3107.2146,-202.8203 3228.0863,-78.4241 3269.9877,-32.8014"/>
-<polygon fill="#191970" stroke="#191970" points="3272.7342,-34.9836 3276.8762,-25.2319 3267.557,-30.2722 3272.7342,-34.9836"/>
+<path fill="none" stroke="#191970" d="M2160.4235,-380.8656C2269.2378,-373.6453 2498.8555,-356.1712 2575.0178,-333 2632.6756,-315.4585 2995.1167,-90.8643 3092.1484,-30.4035"/>
+<polygon fill="#191970" stroke="#191970" points="3094.057,-33.3382 3100.6909,-25.0773 3090.3534,-27.3982 3094.057,-33.3382"/>
 </g>
 <!-- Node41&#45;&gt;Node28 -->
 <g id="edge81" class="edge">
 <title>Node41&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2321.6053,-378.0588C2430.8843,-365.8257 2657.5285,-339.8399 2673.5,-333 2693.5359,-324.4195 2692.1054,-311.9455 2711.5,-302 2735.3945,-289.747 2801.3659,-273.247 2849.0933,-262.2605"/>
-<polygon fill="#191970" stroke="#191970" points="2850.0091,-265.6416 2858.9791,-260.0033 2848.4509,-258.8172 2850.0091,-265.6416"/>
+<path fill="none" stroke="#191970" d="M2160.2469,-383.2938C2261.0498,-380.2615 2474.1534,-369.6819 2650.0178,-333 2730.5073,-316.2115 2821.3387,-282.4859 2868.5674,-263.7727"/>
+<polygon fill="#191970" stroke="#191970" points="2869.9763,-266.9789 2877.9643,-260.0189 2867.3794,-260.4784 2869.9763,-266.9789"/>
 </g>
 <!-- Node41&#45;&gt;Node30 -->
 <g id="edge93" class="edge">
 <title>Node41&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M2321.7644,-369.8794C2369.9809,-357.7801 2437.6383,-340.8024 2482.7113,-329.4919"/>
-<polygon fill="#191970" stroke="#191970" points="2483.6849,-332.8562 2492.5323,-327.0275 2481.9811,-326.0667 2483.6849,-332.8562"/>
+<path fill="none" stroke="#191970" d="M2160.0791,-372.676C2220.4156,-360.3886 2314.2324,-341.283 2374.1304,-329.085"/>
+<polygon fill="#191970" stroke="#191970" points="2375.0718,-332.4652 2384.1723,-327.04 2373.6749,-325.606 2375.0718,-332.4652"/>
 </g>
 <!-- Node41&#45;&gt;Node40 -->
 <g id="edge95" class="edge">
 <title>Node41&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M2303.573,-369.4039C2326.0358,-360.2121 2354.1454,-347.4676 2377.5,-333 2409.1615,-313.3865 2441.8366,-284.9146 2461.1494,-267.0673"/>
-<polygon fill="#191970" stroke="#191970" points="2463.6568,-269.5144 2468.5663,-260.1257 2458.8735,-264.4036 2463.6568,-269.5144"/>
+<path fill="none" stroke="#191970" d="M2160.0223,-380.5757C2255.6497,-373.52 2441.8978,-357.142 2503.0178,-333 2524.9301,-324.3448 2526.4409,-315.1075 2546.0178,-302 2565.2789,-289.1039 2587.6339,-275.2972 2604.2878,-265.2288"/>
+<polygon fill="#191970" stroke="#191970" points="2606.1328,-268.2034 2612.8975,-260.0493 2602.5243,-262.2051 2606.1328,-268.2034"/>
 </g>
 <!-- Node41&#45;&gt;Node42 -->
 <g id="edge75" class="edge">
 <title>Node41&#45;&gt;Node42</title>
-<path fill="none" stroke="#191970" d="M2205.3938,-370.4805C2202.0577,-369.9174 2198.7426,-369.4164 2195.5,-369 1692.0844,-304.3535 1555.4368,-393.8217 1047.7618,-332.9909"/>
-<polygon fill="#191970" stroke="#191970" points="1047.955,-329.4888 1037.6064,-331.76 1047.1126,-336.4379 1047.955,-329.4888"/>
+<path fill="none" stroke="#191970" d="M2043.8961,-370.5943C2040.5639,-370.0022 2037.254,-369.4638 2034.0178,-369 1885.1614,-347.6662 1435.7757,-327.6807 1258.2781,-320.4016"/>
+<polygon fill="#191970" stroke="#191970" points="1258.3871,-316.9032 1248.2526,-319.9921 1258.1014,-323.8974 1258.3871,-316.9032"/>
 </g>
 <!-- Node43 -->
 <g id="node35" class="node">
 <title>Node43</title>
 <g id="a_node35"><a xlink:href="runtime_2module_8h.html" target="_top" xlink:title="Runtime container of the functions generated by TVM, This is used to support dynamically link...">
-<polygon fill="#ffffff" stroke="#ff0000" points="2073.5,-308 2073.5,-327 2199.5,-327 2199.5,-308 2073.5,-308"/>
-<text text-anchor="middle" x="2136.5" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/module.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1996.0178,-308 1996.0178,-327 2122.0178,-327 2122.0178,-308 1996.0178,-308"/>
+<text text-anchor="middle" x="2059.0178" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node41&#45;&gt;Node43 -->
 <g id="edge83" class="edge">
 <title>Node41&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M2229.2582,-369.4639C2206.7802,-358.3234 2178.1687,-343.2212 2158.6105,-332.1691"/>
-<polygon fill="#191970" stroke="#191970" points="2160.2166,-329.0552 2149.803,-327.1039 2156.7268,-335.1233 2160.2166,-329.0552"/>
+<path fill="none" stroke="#191970" d="M2086.4996,-369.2967C2079.0056,-359.4658 2070.6295,-346.6026 2064.8675,-336.1545"/>
+<polygon fill="#191970" stroke="#191970" points="2067.8993,-334.3942 2060.2581,-327.055 2061.6547,-337.5574 2067.8993,-334.3942"/>
 </g>
 <!-- Node41&#45;&gt;Node45 -->
 <g id="edge96" class="edge">
 <title>Node41&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2321.5921,-378.007C2350.9168,-374.9129 2387.0631,-371.3775 2419.5,-369 2835.9033,-338.4787 3343.3522,-322.2962 3475.0528,-318.4224"/>
-<polygon fill="#191970" stroke="#191970" points="3475.3651,-321.9149 3485.2588,-318.1249 3475.161,-314.9179 3475.3651,-321.9149"/>
+<path fill="none" stroke="#191970" d="M2160.2138,-381.425C2381.8335,-369.7148 3169.047,-328.1191 3337.9555,-319.1941"/>
+<polygon fill="#191970" stroke="#191970" points="3338.1567,-322.6885 3347.958,-318.6656 3337.7873,-315.6982 3338.1567,-322.6885"/>
 </g>
 <!-- Node46 -->
 <g id="node37" class="node">
 <title>Node46</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2256,-308 2256,-327 2297,-327 2297,-308 2256,-308"/>
-<text text-anchor="middle" x="2276.5" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tuple</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1936.5178,-308 1936.5178,-327 1977.5178,-327 1977.5178,-308 1936.5178,-308"/>
+<text text-anchor="middle" x="1957.0178" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tuple</text>
 </g>
 <!-- Node41&#45;&gt;Node46 -->
 <g id="edge99" class="edge">
 <title>Node41&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2266.4499,-369.2967C2268.2984,-359.7699 2270.6994,-347.3954 2272.6911,-337.1306"/>
-<polygon fill="#191970" stroke="#191970" points="2276.1771,-337.5386 2274.6461,-327.055 2269.3053,-336.2052 2276.1771,-337.5386"/>
+<path fill="none" stroke="#191970" d="M2067.0613,-369.4511C2044.1489,-359.4389 2013.6234,-345.8181 1987.0178,-333 1986.0407,-332.5293 1985.0482,-332.0456 1984.0476,-331.5534"/>
+<polygon fill="#191970" stroke="#191970" points="1985.5265,-328.3795 1975.0208,-327.0083 1982.3784,-334.6316 1985.5265,-328.3795"/>
 </g>
 <!-- Node42&#45;&gt;Node22 -->
 <g id="edge76" class="edge">
 <title>Node42&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1037.6836,-303.7312C1040.6616,-303.1347 1043.6119,-302.5546 1046.5,-302 1140.7863,-283.8926 1166.3753,-289.3574 1259.5,-266 1377.2834,-236.4578 1402.0627,-213.4679 1518.5,-179 1565.0426,-165.2224 1619.0845,-151.6649 1655.5102,-142.8785"/>
-<polygon fill="#191970" stroke="#191970" points="1656.3887,-146.2672 1665.2961,-140.5306 1654.7555,-139.4603 1656.3887,-146.2672"/>
+<path fill="none" stroke="#191970" d="M1224.8919,-302.4516C1295.1812,-276.1243 1445.9224,-220.5561 1575.0178,-179 1614.3866,-166.3271 1659.7443,-153.3644 1691.9136,-144.4433"/>
+<polygon fill="#191970" stroke="#191970" points="1693.1626,-147.7295 1701.8704,-141.6943 1691.2996,-140.9819 1693.1626,-147.7295"/>
 </g>
 <!-- Node42&#45;&gt;Node18 -->
 <g id="edge78" class="edge">
 <title>Node42&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M911.0653,-311.0617C795.3314,-295.5868 551.5432,-244.9757 439.5,-87 428.9573,-72.1352 428.8408,-50.8778 430.6716,-35.4069"/>
-<polygon fill="#191970" stroke="#191970" points="434.1591,-35.7558 432.2276,-25.3385 427.2413,-34.6866 434.1591,-35.7558"/>
+<path fill="none" stroke="#191970" d="M1160.2814,-302.4268C1131.7414,-285.2718 1083.1904,-256.8085 1040.0178,-235 861.1138,-144.6274 804.9402,-148.0119 614.0178,-87 548.9407,-66.2037 473.2056,-40.7156 431.2275,-26.4739"/>
+<polygon fill="#191970" stroke="#191970" points="432.1517,-23.0915 421.5573,-23.1894 429.9004,-29.7196 432.1517,-23.0915"/>
 </g>
 <!-- Node42&#45;&gt;Node24 -->
 <g id="edge79" class="edge">
 <title>Node42&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1037.7034,-303.8311C1040.6762,-303.2079 1043.6199,-302.5948 1046.5,-302 1127.252,-285.3227 1155.1618,-303.6898 1228.5,-266 1246.3979,-256.802 1244.3667,-245.5541 1261.5,-235 1289.0684,-218.0178 1323.8597,-205.9131 1349.8952,-198.3972"/>
-<polygon fill="#191970" stroke="#191970" points="1351.0704,-201.7031 1359.7631,-195.6461 1349.1905,-194.9602 1351.0704,-201.7031"/>
+<path fill="none" stroke="#191970" d="M1213.3728,-302.2548C1259.8945,-277.242 1351.7904,-227.8335 1397.2537,-203.3899"/>
+<polygon fill="#191970" stroke="#191970" points="1399.067,-206.3888 1406.2172,-198.5706 1395.7521,-200.2235 1399.067,-206.3888"/>
 </g>
 <!-- Node42&#45;&gt;Node31 -->
 <g id="edge80" class="edge">
 <title>Node42&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1015.3444,-302.4639C1042.1799,-292.5849 1077.4782,-279.5905 1106.0629,-269.0675"/>
-<polygon fill="#191970" stroke="#191970" points="1107.3023,-272.3409 1115.4775,-265.6017 1104.884,-265.7719 1107.3023,-272.3409"/>
+<path fill="none" stroke="#191970" d="M1186.8331,-302.2967C1187.7741,-294.4156 1188.9479,-284.5857 1190.0192,-275.6135"/>
+<polygon fill="#191970" stroke="#191970" points="1193.4993,-275.9876 1191.2097,-265.6432 1186.5486,-275.1576 1193.4993,-275.9876"/>
 </g>
 <!-- Node42&#45;&gt;Node36 -->
 <g id="edge77" class="edge">
 <title>Node42&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1037.6185,-303.3333C1040.6139,-302.8433 1043.5857,-302.3945 1046.5,-302 1324.4215,-264.3752 1399.6271,-310.6923 1676.5,-266 1767.2997,-251.3433 1870.8358,-219.5779 1924.9838,-201.7666"/>
-<polygon fill="#191970" stroke="#191970" points="1926.1145,-205.0791 1934.5055,-198.6105 1923.9121,-198.4346 1926.1145,-205.0791"/>
+<path fill="none" stroke="#191970" d="M1248.1886,-314.0267C1392.7901,-305.8022 1739.2435,-284.4429 1789.0178,-266 1811.1098,-257.8142 1811.8875,-247.2407 1832.0178,-235 1851.5439,-223.1267 1874.5175,-211.5308 1892.4963,-202.9518"/>
+<polygon fill="#191970" stroke="#191970" points="1894.3625,-205.9416 1901.9171,-198.5134 1891.3791,-199.6092 1894.3625,-205.9416"/>
 </g>
 <!-- Node43&#45;&gt;Node23 -->
 <g id="edge88" class="edge">
 <title>Node43&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2115.5698,-307.9521C2093.2075,-297.7129 2056.7525,-280.9117 2025.5,-266 1980.3079,-244.4372 1928.1776,-218.6965 1896.8089,-203.1123"/>
-<polygon fill="#191970" stroke="#191970" points="1898.2835,-199.9367 1887.771,-198.6178 1895.1665,-206.2045 1898.2835,-199.9367"/>
+<path fill="none" stroke="#191970" d="M2018.2518,-307.9261C2007.7914,-305.7226 1996.5254,-303.5686 1986.0178,-302 1804.2235,-274.8611 1747.8909,-325.6139 1574.0178,-266 1549.9949,-257.7635 1549.9422,-243.5182 1526.0178,-235 1389.525,-186.4026 1344.8257,-223.4518 1202.0178,-199 1200.743,-198.7817 1199.4494,-198.5488 1198.145,-198.304"/>
+<polygon fill="#191970" stroke="#191970" points="1198.5661,-194.8178 1188.0678,-196.2447 1197.1646,-201.6761 1198.5661,-194.8178"/>
 </g>
 <!-- Node43&#45;&gt;Node20 -->
 <g id="edge91" class="edge">
 <title>Node43&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2073.45,-315.5744C1861.2627,-308.9332 1180.963,-286.2798 1084.5,-266 1046.7782,-258.0696 1040.1054,-247.0779 1003.5,-235 958.4561,-220.1378 905.4902,-205.9387 871.9593,-197.3457"/>
-<polygon fill="#191970" stroke="#191970" points="872.7617,-193.9384 862.2074,-194.8644 871.0355,-200.7223 872.7617,-193.9384"/>
+<path fill="none" stroke="#191970" d="M2020.2017,-307.997C2009.2143,-305.6516 1997.2011,-303.4099 1986.0178,-302 1604.2623,-253.8722 1502.1471,-318.8578 1121.0178,-266 1013.5531,-251.096 889.528,-216.4905 831.3714,-199.2058"/>
+<polygon fill="#191970" stroke="#191970" points="832.21,-195.8034 821.6262,-196.2877 830.202,-202.5092 832.21,-195.8034"/>
 </g>
 <!-- Node43&#45;&gt;Node25 -->
 <g id="edge86" class="edge">
 <title>Node43&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M2162.5931,-307.8589C2184.7997,-298.9414 2216.9128,-284.3147 2241.5,-266 2287.5016,-231.7341 2330.387,-178.7869 2351.2478,-151.1688"/>
-<polygon fill="#191970" stroke="#191970" points="2354.2863,-152.9486 2357.4507,-142.8373 2348.6715,-148.7683 2354.2863,-152.9486"/>
+<path fill="none" stroke="#191970" d="M2076.9061,-307.9746C2092.0589,-299.0354 2113.2932,-284.3134 2126.0178,-266 2150.6168,-230.5968 2160.8425,-179.9394 2164.7812,-152.4869"/>
+<polygon fill="#191970" stroke="#191970" points="2168.253,-152.9299 2166.0723,-142.5619 2161.3115,-152.0269 2168.253,-152.9299"/>
 </g>
 <!-- Node43&#45;&gt;Node10 -->
 <g id="edge87" class="edge">
 <title>Node43&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2165.4479,-307.9237C2279.2112,-270.2895 2695.6864,-132.5147 2824.961,-89.7492"/>
-<polygon fill="#191970" stroke="#191970" points="2826.0747,-93.0673 2834.4694,-86.6037 2823.8761,-86.4215 2826.0747,-93.0673"/>
+<path fill="none" stroke="#191970" d="M2083.8448,-307.9747C2163.5235,-277.6652 2420.3514,-181.9884 2639.0178,-123 2691.5319,-108.8336 2752.0161,-96.6069 2796.7327,-88.3272"/>
+<polygon fill="#191970" stroke="#191970" points="2797.3935,-91.7645 2806.5977,-86.5172 2796.1302,-84.8794 2797.3935,-91.7645"/>
 </g>
 <!-- Node43&#45;&gt;Node11 -->
 <g id="edge84" class="edge">
 <title>Node43&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2199.5918,-308.6048C2215.2433,-306.4216 2231.9788,-304.107 2247.5,-302 2368.7714,-285.5372 2400.0652,-287.7542 2520.5,-266 2721.3074,-229.7281 3327.7807,-83.6696 3536.7888,-32.9526"/>
-<polygon fill="#191970" stroke="#191970" points="3537.8422,-36.2986 3546.7344,-30.5384 3536.1909,-29.4962 3537.8422,-36.2986"/>
+<path fill="none" stroke="#191970" d="M2101.0819,-307.963C2291.0333,-264.8959 3070.1822,-88.242 3316.209,-32.4611"/>
+<polygon fill="#191970" stroke="#191970" points="3317.151,-35.8365 3326.1295,-30.2119 3315.6031,-29.0098 3317.151,-35.8365"/>
 </g>
 <!-- Node43&#45;&gt;Node16 -->
 <g id="edge89" class="edge">
 <title>Node43&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2073.2801,-313.8641C1940.1391,-305.9202 1634.7299,-286.1105 1532.5,-266 1434.7105,-246.763 1401.8587,-253.6275 1318.5,-199 1282.8611,-175.6448 1204.3518,-74.7387 1172.9085,-33.3327"/>
-<polygon fill="#191970" stroke="#191970" points="1175.6065,-31.0977 1166.7827,-25.2333 1170.0235,-35.3203 1175.6065,-31.0977"/>
+<path fill="none" stroke="#191970" d="M2017.8486,-307.9766C2007.497,-305.798 1996.3829,-303.643 1986.0178,-302 1835.4992,-278.1413 1793.2144,-301.5367 1645.0178,-266 1607.534,-257.0116 1601.2265,-245.0667 1564.0178,-235 1468.8014,-209.2394 1437.5936,-233.0514 1345.0178,-199 1299.7355,-182.3442 1284.7999,-178.4011 1252.0178,-143 1222.1411,-110.7365 1202.9228,-61.901 1193.9488,-34.9967"/>
+<polygon fill="#191970" stroke="#191970" points="1197.2034,-33.6832 1190.8219,-25.2258 1190.5365,-35.8168 1197.2034,-33.6832"/>
 </g>
 <!-- Node43&#45;&gt;Node33 -->
 <g id="edge85" class="edge">
 <title>Node43&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M2073.3835,-309.5511C1974.295,-297.0719 1782.7303,-272.9463 1677.7293,-259.7225"/>
-<polygon fill="#191970" stroke="#191970" points="1677.8934,-256.2156 1667.5344,-258.4385 1677.0186,-263.1607 1677.8934,-256.2156"/>
+<path fill="none" stroke="#191970" d="M2014.6527,-307.9313C2005.2083,-305.936 1995.2888,-303.8715 1986.0178,-302 1919.7615,-288.625 1844.144,-274.2243 1789.9909,-264.0611"/>
+<polygon fill="#191970" stroke="#191970" points="1790.6099,-260.6163 1780.1362,-262.214 1789.3202,-267.4965 1790.6099,-260.6163"/>
 </g>
 <!-- Node43&#45;&gt;Node36 -->
 <g id="edge90" class="edge">
 <title>Node43&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M2123.4217,-307.8416C2093.6948,-285.8881 2020.8,-232.0549 1984.2037,-205.0283"/>
-<polygon fill="#191970" stroke="#191970" points="1985.8535,-201.8957 1975.73,-198.7705 1981.695,-207.5266 1985.8535,-201.8957"/>
+<path fill="none" stroke="#191970" d="M2055.6632,-307.9386C2049.3997,-291.2611 2034.4925,-256.576 2012.0178,-235 1997.3023,-220.873 1977.3905,-210.0354 1960.132,-202.4382"/>
+<polygon fill="#191970" stroke="#191970" points="1961.4421,-199.1924 1950.8668,-198.5507 1958.7337,-205.6472 1961.4421,-199.1924"/>
 </g>
 <!-- Node43&#45;&gt;Node41 -->
 <g id="edge92" class="edge">
 <title>Node43&#45;&gt;Node41</title>
-<path fill="none" stroke="#191970" d="M2159.6059,-327.1039C2179.9296,-336.8322 2209.4433,-352.1681 2231.901,-364.5409"/>
-<polygon fill="#191970" stroke="#191970" points="2230.2999,-367.6554 2240.7392,-369.4639 2233.7062,-361.5401 2230.2999,-367.6554"/>
+<path fill="none" stroke="#191970" d="M2070.042,-327.055C2077.3113,-335.7473 2086.3187,-348.9456 2093.1176,-360.5001"/>
+<polygon fill="#191970" stroke="#191970" points="2090.095,-362.2663 2098.0212,-369.2967 2096.2092,-358.8579 2090.095,-362.2663"/>
 </g>
 <!-- Node51&#45;&gt;Node3 -->
 <g id="edge134" class="edge">
 <title>Node51&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1122.5068,-677.9466C1156.6895,-675.8628 1203.3666,-673.1088 1244.5,-671 1626.6932,-651.4058 2087.8587,-632.8709 2238.3701,-626.9528"/>
-<polygon fill="#191970" stroke="#191970" points="2238.8098,-630.4383 2248.6647,-626.5486 2238.5351,-623.4437 2238.8098,-630.4383"/>
+<path fill="none" stroke="#191970" d="M1134.1949,-667.485C1315.8546,-658.1946 1947.9701,-625.8673 2130.3834,-616.5384"/>
+<polygon fill="#191970" stroke="#191970" points="2130.6836,-620.0277 2140.4918,-616.0214 2130.326,-613.0368 2130.6836,-620.0277"/>
 </g>
 <!-- Node51&#45;&gt;Node21 -->
 <g id="edge135" class="edge">
 <title>Node51&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1056.4433,-671.377C1044.1911,-663.4014 1028.6292,-650.8248 1021.5,-635 972.9406,-527.211 960.5802,-470.3177 1021.5,-369 1029.9211,-354.9946 1043.5767,-344.5945 1058.0176,-336.9554"/>
-<polygon fill="#191970" stroke="#191970" points="1059.6116,-340.0722 1067.0709,-332.5483 1056.5477,-333.7783 1059.6116,-340.0722"/>
+<path fill="none" stroke="#191970" d="M1072.0553,-660.2062C1042.3738,-636.555 972.0178,-573.0042 972.0178,-502 972.0178,-502 972.0178,-502 972.0178,-446 972.0178,-410.154 970.9022,-396.5906 948.0178,-369 936.7232,-355.3827 920.8729,-344.871 905.2531,-337.0025"/>
+<polygon fill="#191970" stroke="#191970" points="906.6545,-333.7935 896.1192,-332.6716 903.6554,-340.1185 906.6545,-333.7935"/>
 </g>
 <!-- Node51&#45;&gt;Node16 -->
 <g id="edge138" class="edge">
 <title>Node51&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1024.4552,-672.9684C877.8024,-647.857 447.82,-565.8981 367.5,-456 362.255,-448.8235 366.3376,-444.8126 367.5,-436 390.651,-260.488 355.5297,-138.4567 517.5,-67 573.6078,-42.2468 1005.0125,-22.0985 1127.2903,-16.8405"/>
-<polygon fill="#191970" stroke="#191970" points="1127.6563,-20.3282 1137.4981,-16.4054 1127.3581,-13.3345 1127.6563,-20.3282"/>
+<path fill="none" stroke="#191970" d="M1035.9586,-663.9795C957.3332,-652.9771 799.5987,-625.2813 678.0178,-568 640.0623,-550.1178 630.8076,-542.5212 602.0178,-512 504.4626,-408.578 383.606,-364.6194 442.0178,-235 488.3292,-132.2322 528.5582,-111.7446 632.0178,-67 727.1909,-25.8391 1051.5702,-17.5074 1155.6161,-15.8805"/>
+<polygon fill="#191970" stroke="#191970" points="1155.8668,-19.3774 1165.8147,-15.7319 1155.7647,-12.3781 1155.8668,-19.3774"/>
 </g>
 <!-- Node51&#45;&gt;Node17 -->
 <g id="edge139" class="edge">
 <title>Node51&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M1122.8704,-677.7789C1157.0162,-675.6331 1203.504,-672.8735 1244.5,-671 1719.5549,-649.2896 2924.3215,-706.3617 3382.5,-579 3443.6079,-562.0136 3471.4384,-563.4699 3508.5,-512 3563.592,-435.4901 3571.862,-390.1809 3538.5,-302 3490.8876,-176.1529 3359.9745,-69.9226 3307.5912,-31.1865"/>
-<polygon fill="#191970" stroke="#191970" points="3309.4607,-28.2179 3299.3196,-25.1506 3305.3345,-33.8725 3309.4607,-28.2179"/>
+<path fill="none" stroke="#191970" d="M1134.1134,-669.1769C1426.6719,-664.057 2934.0428,-634.5915 3128.0178,-568 3292.8195,-511.4236 3484.5455,-462.2013 3416.0178,-302 3358.6331,-167.8484 3204.181,-66.5921 3142.242,-30.1862"/>
+<polygon fill="#191970" stroke="#191970" points="3143.8671,-27.0828 3133.4601,-25.0965 3140.3571,-33.1391 3143.8671,-27.0828"/>
 </g>
 <!-- Node51&#45;&gt;Node33 -->
 <g id="edge137" class="edge">
 <title>Node51&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1073.5,-671.2511C1073.5,-651.1182 1073.5,-603.4826 1073.5,-563.5 1073.5,-563.5 1073.5,-563.5 1073.5,-446 1073.5,-434.0412 1222.6294,-306.9841 1233.5,-302 1348.3538,-249.3404 1390.5626,-284.8441 1515.5,-266 1520.6033,-265.2303 1525.8651,-264.4023 1531.1604,-263.5439"/>
-<polygon fill="#191970" stroke="#191970" points="1532.0551,-266.9437 1541.352,-261.8626 1530.9157,-260.037 1532.0551,-266.9437"/>
+<path fill="none" stroke="#191970" d="M1089.1729,-660.4879C1112.5457,-607.4254 1229.1105,-348.7062 1300.0178,-302 1354.9193,-265.8367 1539.2358,-255.0671 1643.5096,-251.86"/>
+<polygon fill="#191970" stroke="#191970" points="1643.7588,-255.3543 1653.6524,-251.5641 1643.5546,-248.3573 1643.7588,-255.3543"/>
 </g>
 <!-- Node51&#45;&gt;Node42 -->
 <g id="edge136" class="edge">
 <title>Node51&#45;&gt;Node42</title>
-<path fill="none" stroke="#191970" d="M1055.1311,-671.4499C1041.0615,-663.3519 1022.0034,-650.5812 1009.5,-635 985.1574,-604.6654 948.1202,-472.5295 945.5,-456 939.4463,-417.8102 935.6932,-406.4024 945.5,-369 948.0228,-359.3784 952.7931,-349.6592 957.7472,-341.2958"/>
-<polygon fill="#191970" stroke="#191970" points="960.7183,-343.1459 963.0974,-332.8216 954.7993,-339.4089 960.7183,-343.1459"/>
+<path fill="none" stroke="#191970" d="M1085.1611,-660.4119C1085.4376,-641.0545 1086.0178,-595.9182 1086.0178,-558 1086.0178,-558 1086.0178,-558 1086.0178,-446 1086.0178,-400.8879 1124.7804,-361.919 1153.7955,-339.0685"/>
+<polygon fill="#191970" stroke="#191970" points="1156.2178,-341.6234 1162.0564,-332.7824 1151.9789,-336.0527 1156.2178,-341.6234"/>
 </g>
 <!-- Node54&#45;&gt;Node18 -->
 <g id="edge144" class="edge">
 <title>Node54&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M509.4711,-436.4936C486.2445,-411.2625 424.8907,-339.494 405.5,-266 383.3073,-181.8861 413.0372,-77.4313 427.457,-34.8875"/>
-<polygon fill="#191970" stroke="#191970" points="430.8338,-35.8339 430.8303,-25.239 424.226,-33.5236 430.8338,-35.8339"/>
+<path fill="none" stroke="#191970" d="M391.8676,-436.3293C382.9507,-427.8007 371.1257,-414.5 366.0178,-400 319.0684,-266.7221 371.8765,-92.0726 391.9368,-34.681"/>
+<polygon fill="#191970" stroke="#191970" points="395.2957,-35.6827 395.3719,-25.0881 388.7055,-33.3228 395.2957,-35.6827"/>
 </g>
 <!-- Node54&#45;&gt;Node20 -->
 <g id="edge145" class="edge">
 <title>Node54&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M518.8497,-436.1487C520.6185,-402.2508 531.2948,-289.5182 595.5,-235 626.6071,-208.5862 745.4956,-196.0364 804.6547,-191.3249"/>
-<polygon fill="#191970" stroke="#191970" points="804.9296,-194.8142 814.6327,-190.5598 804.3944,-187.8347 804.9296,-194.8142"/>
+<path fill="none" stroke="#191970" d="M406.8168,-436.465C420.8263,-402.642 473.1204,-287.6245 556.0178,-235 590.1777,-213.3148 706.0542,-198.555 764.1825,-192.3304"/>
+<polygon fill="#191970" stroke="#191970" points="764.7504,-195.7902 774.3315,-191.2677 764.0214,-188.8282 764.7504,-195.7902"/>
 </g>
 <!-- Node54&#45;&gt;Node16 -->
 <g id="edge143" class="edge">
 <title>Node54&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M513.8942,-436.3739C498.6865,-404.3916 450.6373,-301.6865 443.5,-266 423.6811,-166.9053 465.1082,-115.9839 553.5,-67 603.8767,-39.0828 1008.5878,-21.3429 1127.0414,-16.7115"/>
-<polygon fill="#191970" stroke="#191970" points="1127.4214,-20.1996 1137.2788,-16.3162 1127.1512,-13.2048 1127.4214,-20.1996"/>
+<path fill="none" stroke="#191970" d="M400.3653,-436.1554C392.4996,-404.9424 371.7919,-306.0612 404.0178,-235 450.5726,-132.3422 490.4271,-111.4404 594.0178,-67 645.8495,-44.7642 1039.9583,-23.0622 1155.9801,-17.1016"/>
+<polygon fill="#191970" stroke="#191970" points="1156.2056,-20.5947 1166.0143,-16.5898 1155.849,-13.6038 1156.2056,-20.5947"/>
 </g>
 <!-- Node54&#45;&gt;Node41 -->
 <g id="edge142" class="edge">
 <title>Node54&#45;&gt;Node41</title>
-<path fill="none" stroke="#191970" d="M589.1298,-444.883C887.2637,-440.0562 2034.814,-420.3163 2195.5,-400 2195.6014,-399.9872 2195.7028,-399.9743 2195.8043,-399.9613"/>
-<polygon fill="#191970" stroke="#191970" points="2196.0195,-403.4679 2205.3957,-398.5347 2194.9896,-396.5441 2196.0195,-403.4679"/>
+<path fill="none" stroke="#191970" d="M473.7561,-445.8483C745.8889,-444.9788 1725.4214,-439.1386 2034.0178,-400 2034.1191,-399.9871 2034.2206,-399.9742 2034.3221,-399.9612"/>
+<polygon fill="#191970" stroke="#191970" points="2034.5384,-403.4677 2043.9131,-398.5317 2033.5064,-396.5442 2034.5384,-403.4677"/>
 </g>
 <!-- Node58&#45;&gt;Node1 -->
 <g id="edge156" class="edge">
 <title>Node58&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M1897.0745,-846.826C1787.8798,-842.9216 1593.3476,-832.1748 1429.5,-803 1391.6198,-796.255 1290.0994,-766.7464 1232.2912,-749.5553"/>
-<polygon fill="#191970" stroke="#191970" points="1232.9577,-746.1018 1222.3746,-746.5995 1230.9582,-752.8102 1232.9577,-746.1018"/>
+<path fill="none" stroke="#191970" d="M1494.1035,-839.4507C1398.9645,-817.2456 1163.0541,-762.1849 1058.8661,-737.8678"/>
+<polygon fill="#191970" stroke="#191970" points="1059.4398,-734.4077 1048.906,-735.5432 1057.8488,-741.2245 1059.4398,-734.4077"/>
 </g>
 <!-- Node58&#45;&gt;Node3 -->
 <g id="edge155" class="edge">
 <title>Node58&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M2027.47,-839.4776C2100.68,-822.9769 2240.4102,-786.9793 2269.5,-747 2291.5822,-716.6515 2292.3462,-670.7472 2290.6234,-644.7621"/>
-<polygon fill="#191970" stroke="#191970" points="2294.0979,-644.3141 2289.7561,-634.6497 2287.1235,-644.9123 2294.0979,-644.3141"/>
+<path fill="none" stroke="#191970" d="M1620.175,-842.5677C1808.1287,-827.4864 2245.179,-787.2179 2287.0178,-736 2319.6602,-696.04 2250.8304,-650.8133 2209.0122,-628.3085"/>
+<polygon fill="#191970" stroke="#191970" points="2210.5247,-625.1496 2200.0432,-623.6044 2207.2734,-631.3487 2210.5247,-625.1496"/>
 </g>
 <!-- Node58&#45;&gt;Node6 -->
-<g id="edge162" class="edge">
+<g id="edge163" class="edge">
 <title>Node58&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2067.7799,-841.7984C2264.9251,-822.8039 2735.5,-763.0105 2735.5,-625 2735.5,-625 2735.5,-625 2735.5,-563.5 2735.5,-519.8161 2775.0725,-482.1453 2801.4428,-461.8918"/>
-<polygon fill="#191970" stroke="#191970" points="2803.8453,-464.4682 2809.8035,-455.7074 2799.6825,-458.8405 2803.8453,-464.4682"/>
+<path fill="none" stroke="#191970" d="M1620.3112,-848.6266C1995.1919,-846.7865 3479.4673,-837.3398 3513.0178,-803 3550.7944,-764.3347 3518.4919,-751.824 3346.0178,-604 3265.4049,-534.9083 3237.6906,-519.0257 3135.0178,-492 3027.7042,-463.7527 2898.954,-452.8102 2820.8735,-448.599"/>
+<polygon fill="#191970" stroke="#191970" points="2820.8369,-445.0926 2810.6695,-448.0716 2820.4755,-452.0832 2820.8369,-445.0926"/>
 </g>
 <!-- Node58&#45;&gt;Node21 -->
-<g id="edge163" class="edge">
+<g id="edge164" class="edge">
 <title>Node58&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1897.49,-844.8572C1761.0939,-837.6967 1504.8234,-821.9384 1467.5,-803 1460.8563,-799.6289 1116.1271,-406.9714 1113.5,-400 1106.6098,-381.7156 1108.4644,-359.5403 1111.7102,-342.7869"/>
-<polygon fill="#191970" stroke="#191970" points="1115.2089,-343.1775 1113.9661,-332.6558 1108.3762,-341.656 1115.2089,-343.1775"/>
+<path fill="none" stroke="#191970" d="M1449.6316,-846.1971C1225.2261,-837.7006 636.0178,-807.6205 636.0178,-726 636.0178,-726 636.0178,-726 636.0178,-670 636.0178,-558.5826 804.3428,-661.4472 865.0178,-568 910.9832,-497.2075 879.339,-390.056 861.2422,-342.1531"/>
+<polygon fill="#191970" stroke="#191970" points="864.4109,-340.6453 857.5134,-332.6032 857.8903,-343.1913 864.4109,-340.6453"/>
 </g>
 <!-- Node58&#45;&gt;Node10 -->
-<g id="edge165" class="edge">
+<g id="edge166" class="edge">
 <title>Node58&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M2067.5021,-846.9525C2310.5419,-840.9261 3005.4485,-822.4504 3232.5,-803 3417.9369,-787.1145 3525.0113,-887.1275 3647.5,-747 3781.5115,-593.6904 3679.7678,-448.6504 3538.5,-302 3393.4599,-151.4336 3303.8398,-182.7562 3103.5,-123 3047.326,-106.2447 2981.875,-94.1759 2933.5254,-86.6156"/>
-<polygon fill="#191970" stroke="#191970" points="2933.893,-83.1311 2923.4772,-85.0707 2932.8292,-90.0498 2933.893,-83.1311"/>
+<path fill="none" stroke="#191970" d="M1620.1249,-848.0163C1995.9785,-843.5509 3490.4808,-824.4752 3534.0178,-803 3630.6916,-755.3144 3686.0178,-721.795 3686.0178,-614 3686.0178,-614 3686.0178,-614 3686.0178,-558 3686.0178,-517.5506 3658.5845,-405.4674 3565.0178,-302 3434.6542,-157.842 3354.8686,-168.952 3166.0178,-123 3086.7954,-103.7233 2993.8844,-91.0736 2931.8239,-84.0054"/>
+<polygon fill="#191970" stroke="#191970" points="2932.0813,-80.5124 2921.7537,-82.8767 2931.3015,-87.4689 2932.0813,-80.5124"/>
 </g>
 <!-- Node58&#45;&gt;Node33 -->
-<g id="edge164" class="edge">
+<g id="edge165" class="edge">
 <title>Node58&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1897.2435,-840.67C1823.3058,-832.5604 1722.5141,-819.1204 1685.5,-803 1538.1128,-738.81 1471.6389,-723.6509 1401.5,-579 1342.3131,-456.9359 1503.5151,-322.6027 1573.6868,-271.6555"/>
-<polygon fill="#191970" stroke="#191970" points="1576.0736,-274.2512 1582.1665,-265.5836 1571.9983,-268.5598 1576.0736,-274.2512"/>
+<path fill="none" stroke="#191970" d="M1505.4493,-839.3976C1488.607,-832.2338 1468.9384,-820.6009 1459.0178,-803 1450.2464,-787.438 1336.5405,-460.3329 1468.0178,-302 1490.1023,-275.4045 1578.365,-262.0748 1643.5639,-255.7336"/>
+<polygon fill="#191970" stroke="#191970" points="1644.1855,-259.1908 1653.8162,-254.7749 1643.5338,-252.2212 1644.1855,-259.1908"/>
 </g>
 <!-- Node58&#45;&gt;Node41 -->
-<g id="edge166" class="edge">
+<g id="edge167" class="edge">
 <title>Node58&#45;&gt;Node41</title>
-<path fill="none" stroke="#191970" d="M2024.219,-839.4948C2072.2185,-825.7992 2144.5,-795.4685 2144.5,-737 2144.5,-737 2144.5,-737 2144.5,-681 2144.5,-596.6609 2126.7692,-568.8561 2161.5,-492 2177.5164,-456.5572 2210.1865,-425.4865 2234.256,-405.9839"/>
-<polygon fill="#191970" stroke="#191970" points="2236.6885,-408.5229 2242.3695,-399.5799 2232.3516,-403.0282 2236.6885,-408.5229"/>
+<path fill="none" stroke="#191970" d="M1553.9265,-839.4573C1569.7482,-831.0068 1592.4838,-817.7489 1610.0178,-803 1674.8415,-748.473 1677.6782,-721.4149 1736.0178,-660 1809.6139,-582.5243 1822.295,-555.8002 1908.0178,-492 1956.7351,-455.7416 2019.0576,-423.3576 2059.7311,-403.8359"/>
+<polygon fill="#191970" stroke="#191970" points="2061.2967,-406.9671 2068.8252,-399.5124 2058.2911,-400.6452 2061.2967,-406.9671"/>
 </g>
 <!-- Node59 -->
 <g id="node42" class="node">
 <title>Node59</title>
 <g id="a_node42"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="tvm/meta_schedule/arg\l_info.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="3049.5,-548.5 3049.5,-578.5 3181.5,-578.5 3181.5,-548.5 3049.5,-548.5"/>
-<text text-anchor="start" x="3057.5" y="-566.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/arg</text>
-<text text-anchor="middle" x="3115.5" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_info.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="3372.0178,-772.5 3372.0178,-802.5 3504.0178,-802.5 3504.0178,-772.5 3372.0178,-772.5"/>
+<text text-anchor="start" x="3380.0178" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/meta_schedule/arg</text>
+<text text-anchor="middle" x="3438.0178" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_info.h</text>
 </a>
 </g>
 </g>
 <!-- Node58&#45;&gt;Node59 -->
 <g id="edge157" class="edge">
 <title>Node58&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M2067.6332,-847.1887C2386.9817,-839.8421 3495.5,-809.1448 3495.5,-737 3495.5,-737 3495.5,-737 3495.5,-681 3495.5,-618.0542 3302.7839,-585.1714 3191.8346,-571.4561"/>
-<polygon fill="#191970" stroke="#191970" points="3192.0393,-567.9554 3181.6909,-570.2273 3191.1974,-574.9046 3192.0393,-567.9554"/>
+<path fill="none" stroke="#191970" d="M1620.1117,-848.7292C1932.3463,-847.4433 3014.6574,-840.3331 3358.0178,-803 3359.2836,-802.8624 3360.5592,-802.7153 3361.8425,-802.5595"/>
+<polygon fill="#191970" stroke="#191970" points="3362.3537,-806.0223 3371.7925,-801.21 3361.4128,-799.0858 3362.3537,-806.0223"/>
 </g>
 <!-- Node58&#45;&gt;Node65 -->
-<g id="edge167" class="edge">
+<g id="edge168" class="edge">
 <title>Node58&#45;&gt;Node65</title>
-<path fill="none" stroke="#191970" d="M1897.3194,-842.7868C1782.4305,-834.3113 1571.4562,-818.4116 1391.5,-803 1378.0939,-801.8519 1363.7496,-800.5571 1350.0595,-799.2893"/>
-<polygon fill="#191970" stroke="#191970" points="1350.0263,-795.7711 1339.7446,-798.3279 1349.3766,-802.7409 1350.0263,-795.7711"/>
+<path fill="none" stroke="#191970" d="M1449.9362,-846.5073C1288.3712,-841.3759 926.1109,-827.9644 622.0178,-803 598.9117,-801.1031 573.6655,-798.401 551.266,-795.7828"/>
+<polygon fill="#191970" stroke="#191970" points="551.535,-792.2902 541.1925,-794.5889 550.7111,-799.2416 551.535,-792.2902"/>
 </g>
 <!-- Node74 -->
 <g id="node46" class="node">
 <title>Node74</title>
 <g id="a_node46"><a xlink:href="trace_8h.html" target="_top" xlink:title="tvm/tir/schedule/trace.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1916,-783.5 1916,-802.5 2049,-802.5 2049,-783.5 1916,-783.5"/>
-<text text-anchor="middle" x="1982.5" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/schedule/trace.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1468.5178,-778 1468.5178,-797 1601.5178,-797 1601.5178,-778 1468.5178,-778"/>
+<text text-anchor="middle" x="1535.0178" y="-785" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/schedule/trace.h</text>
 </a>
 </g>
 </g>
 <!-- Node58&#45;&gt;Node74 -->
-<g id="edge182" class="edge">
+<g id="edge183" class="edge">
 <title>Node58&#45;&gt;Node74</title>
-<path fill="none" stroke="#191970" d="M1982.5,-839.2455C1982.5,-831.9382 1982.5,-821.6944 1982.5,-812.7046"/>
-<polygon fill="#191970" stroke="#191970" points="1986.0001,-812.6426 1982.5,-802.6427 1979.0001,-812.6427 1986.0001,-812.6426"/>
+<path fill="none" stroke="#191970" d="M1535.0178,-839.3906C1535.0178,-830.8657 1535.0178,-818.1392 1535.0178,-807.4235"/>
+<polygon fill="#191970" stroke="#191970" points="1538.5179,-807.2448 1535.0178,-797.2449 1531.5179,-807.2449 1538.5179,-807.2448"/>
 </g>
-<!-- Node59&#45;&gt;Node5 -->
+<!-- Node59&#45;&gt;Node1 -->
 <g id="edge158" class="edge">
+<title>Node59&#45;&gt;Node1</title>
+<path fill="none" stroke="#191970" d="M3371.7609,-785.8231C3022.0412,-776.9722 1394.4138,-735.7792 1065.9017,-727.465"/>
+<polygon fill="#191970" stroke="#191970" points="1065.7482,-723.9601 1055.6628,-727.2058 1065.571,-730.9578 1065.7482,-723.9601"/>
+</g>
+<!-- Node59&#45;&gt;Node5 -->
+<g id="edge159" class="edge">
 <title>Node59&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M3049.325,-558.9528C2881.443,-547.4168 2440.9205,-517.1462 2280.2455,-506.1054"/>
-<polygon fill="#191970" stroke="#191970" points="2280.2891,-502.6002 2270.0727,-505.4064 2279.8092,-509.5838 2280.2891,-502.6002"/>
+<path fill="none" stroke="#191970" d="M3426.9466,-772.2068C3402.7712,-739.3596 3342.5604,-660.472 3282.0178,-604 3251.3362,-575.3813 3245.5519,-562.0516 3206.0178,-548 3121.772,-518.0566 2504.5457,-505.978 2307.973,-502.8647"/>
+<polygon fill="#191970" stroke="#191970" points="2307.8844,-499.363 2297.8309,-502.7062 2307.7749,-506.3622 2307.8844,-499.363"/>
 </g>
 <!-- Node59&#45;&gt;Node6 -->
-<g id="edge159" class="edge">
+<g id="edge160" class="edge">
 <title>Node59&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M3078.3491,-548.4992C3021.2282,-525.4349 2912.709,-481.617 2857.9227,-459.4954"/>
-<polygon fill="#191970" stroke="#191970" points="2858.9258,-456.126 2848.3427,-455.6272 2856.3049,-462.6168 2858.9258,-456.126"/>
+<path fill="none" stroke="#191970" d="M3432.4102,-772.3893C3426.5723,-757.4918 3416.6625,-734.3557 3405.0178,-716 3348.6199,-627.0993 3331.804,-595.8351 3238.0178,-548 3101.1832,-478.2082 2919.5431,-456.162 2820.9797,-449.2038"/>
+<polygon fill="#191970" stroke="#191970" points="2821.0459,-445.7004 2810.8332,-448.5203 2820.5753,-452.6845 2821.0459,-445.7004"/>
 </g>
 <!-- Node59&#45;&gt;Node10 -->
-<g id="edge161" class="edge">
+<g id="edge162" class="edge">
 <title>Node59&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M3135.6552,-548.4562C3146.707,-539.15 3159.707,-526.275 3167.5,-512 3226.6154,-403.7146 3256.6287,-335.8021 3185.5,-235 3136.9133,-166.1439 3108.4258,-159.5664 3032.5,-123 2999.4792,-107.097 2960.0891,-95.9719 2927.6473,-88.651"/>
-<polygon fill="#191970" stroke="#191970" points="2928.3341,-85.2185 2917.817,-86.5 2926.8378,-92.0567 2928.3341,-85.2185"/>
+<path fill="none" stroke="#191970" d="M3465.0417,-772.2769C3521.7594,-739.3986 3648.0178,-660.9371 3648.0178,-614 3648.0178,-614 3648.0178,-614 3648.0178,-558 3648.0178,-223.6209 3124.0823,-114.6198 2931.7417,-85.8706"/>
+<polygon fill="#191970" stroke="#191970" points="2932.1428,-82.392 2921.7407,-84.4036 2931.1268,-89.3179 2932.1428,-82.392"/>
 </g>
 <!-- Node59&#45;&gt;Node28 -->
-<g id="edge160" class="edge">
+<g id="edge161" class="edge">
 <title>Node59&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M3113.7438,-548.4104C3107.9079,-504.701 3085.3899,-376.3912 3015.5,-302 2998.2045,-283.5906 2973.0715,-271.2448 2950.8975,-263.2684"/>
-<polygon fill="#191970" stroke="#191970" points="2951.8771,-259.9049 2941.2828,-260.0089 2949.6296,-266.5343 2951.8771,-259.9049"/>
+<path fill="none" stroke="#191970" d="M3442.8878,-772.11C3462.999,-705.2182 3532.53,-437.0544 3401.0178,-302 3372.2777,-272.4858 3113.8949,-258.4918 2980.2842,-253.1802"/>
+<polygon fill="#191970" stroke="#191970" points="2980.231,-249.6756 2970.102,-252.7827 2979.9578,-256.6702 2980.231,-249.6756"/>
 </g>
 <!-- Node65&#45;&gt;Node1 -->
-<g id="edge169" class="edge">
+<g id="edge170" class="edge">
 <title>Node65&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M1268.1264,-783.2455C1253.5692,-774.5731 1232.069,-761.7645 1215.3848,-751.825"/>
-<polygon fill="#191970" stroke="#191970" points="1217.0683,-748.7539 1206.6859,-746.6427 1213.4856,-754.7676 1217.0683,-748.7539"/>
+<path fill="none" stroke="#191970" d="M541.1472,-778.2972C555.1142,-776.0988 570.1021,-773.8542 584.0178,-772 714.5802,-754.6031 868.4271,-739.1719 950.1534,-731.3751"/>
+<polygon fill="#191970" stroke="#191970" points="950.6123,-734.8473 960.2364,-730.417 949.9501,-727.8787 950.6123,-734.8473"/>
 </g>
 <!-- Node65&#45;&gt;Node3 -->
-<g id="edge168" class="edge">
+<g id="edge169" class="edge">
 <title>Node65&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1339.5718,-791.9354C1485.5776,-788.7238 1880.0287,-777.4097 2006.5,-747 2106.1787,-723.0325 2215.1283,-666.2923 2263.25,-639.4945"/>
-<polygon fill="#191970" stroke="#191970" points="2265.0399,-642.5034 2272.0426,-634.5527 2261.6102,-636.4012 2265.0399,-642.5034"/>
+<path fill="none" stroke="#191970" d="M541.2798,-787.1225C770.9185,-785.3072 1650.5661,-775.8424 1925.0178,-736 2023.5887,-721.6904 2062.7531,-740.3166 2142.0178,-680 2157.4274,-668.274 2167.6425,-648.3641 2173.5632,-633.4717"/>
+<polygon fill="#191970" stroke="#191970" points="2176.9877,-634.2972 2177.1282,-623.7033 2170.4119,-631.8973 2176.9877,-634.2972"/>
 </g>
 <!-- Node65&#45;&gt;Node5 -->
-<g id="edge170" class="edge">
+<g id="edge171" class="edge">
 <title>Node65&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1339.6131,-791.8485C1461.3482,-788.7571 1749.6464,-778.2706 1841.5,-747 1996.419,-694.2594 2150.8408,-564.4843 2202.4466,-518.4901"/>
-<polygon fill="#191970" stroke="#191970" points="2204.9962,-520.9044 2210.0917,-511.6153 2200.3156,-515.6994 2204.9962,-520.9044"/>
+<path fill="none" stroke="#191970" d="M541.3724,-786.4379C842.5918,-780.5958 2271.4172,-752.0531 2287.0178,-736 2345.3134,-676.0136 2286.5646,-563.927 2259.7942,-520.1633"/>
+<polygon fill="#191970" stroke="#191970" points="2262.7538,-518.2947 2254.4768,-511.6809 2256.8228,-522.0127 2262.7538,-518.2947"/>
 </g>
 <!-- Node65&#45;&gt;Node20 -->
-<g id="edge181" class="edge">
+<g id="edge182" class="edge">
 <title>Node65&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1229.2336,-792.4385C1065.0074,-790.3902 581.8611,-781.5145 428.5,-747 353.0525,-730.0203 267.5,-758.3346 267.5,-681 267.5,-681 267.5,-681 267.5,-502 267.5,-255.6411 681.2726,-201.8997 804.3607,-191.363"/>
-<polygon fill="#191970" stroke="#191970" points="805.0084,-194.822 814.6918,-190.523 804.441,-187.845 805.0084,-194.822"/>
+<path fill="none" stroke="#191970" d="M467.554,-777.9659C437.1843,-760.8077 380.0178,-721.6502 380.0178,-670 380.0178,-670 380.0178,-670 380.0178,-558 380.0178,-498.1516 301.1901,-491.7259 323.0178,-436 336.9551,-400.4183 461.8298,-263.1563 516.0178,-235 558.7568,-212.7926 698.9196,-197.8077 764.112,-191.8661"/>
+<polygon fill="#191970" stroke="#191970" points="764.6242,-195.3344 774.273,-190.9582 764.0011,-188.3622 764.6242,-195.3344"/>
 </g>
 <!-- Node65&#45;&gt;Node16 -->
-<g id="edge179" class="edge">
+<g id="edge180" class="edge">
 <title>Node65&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1229.2024,-792.6333C1032.5993,-791.0305 374.5093,-783.003 287.5,-747 213.0781,-716.2055 153.5,-705.5414 153.5,-625 153.5,-625 153.5,-625 153.5,-384.5 153.5,-198.1075 247.7979,-116.4907 427.5,-67 562.2257,-29.896 1003.5978,-18.5648 1127.1184,-16.0823"/>
-<polygon fill="#191970" stroke="#191970" points="1127.4869,-19.5758 1137.4166,-15.8812 1127.3502,-12.5772 1127.4869,-19.5758"/>
+<path fill="none" stroke="#191970" d="M455.4921,-777.9877C373.0403,-750.1903 152.0178,-660.8604 152.0178,-502 152.0178,-502 152.0178,-502 152.0178,-317.5 152.0178,-129.0879 316.7308,-130.7841 494.0178,-67 556.7691,-44.4234 1027.5159,-22.4815 1155.8103,-16.8732"/>
+<polygon fill="#191970" stroke="#191970" points="1155.963,-20.37 1165.8017,-16.4393 1155.6592,-13.3766 1155.963,-20.37"/>
 </g>
 <!-- Node65&#45;&gt;Node57 -->
-<g id="edge180" class="edge">
+<g id="edge181" class="edge">
 <title>Node65&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1285.5231,-783.4509C1287.4803,-765.184 1291.7905,-724.9553 1294.3895,-700.6976"/>
-<polygon fill="#191970" stroke="#191970" points="1297.8941,-700.8409 1295.4795,-690.5249 1290.934,-700.0951 1297.8941,-700.8409"/>
+<path fill="none" stroke="#191970" d="M492.4827,-777.8845C505.6764,-758.261 535.887,-713.3274 552.8638,-688.0772"/>
+<polygon fill="#191970" stroke="#191970" points="555.7885,-690 558.4636,-679.7484 549.9794,-686.0942 555.7885,-690"/>
 </g>
 <!-- Node66 -->
 <g id="node44" class="node">
 <title>Node66</title>
 <g id="a_node44"><a xlink:href="with_8h.html" target="_top" xlink:title="RAII wrapper function to enter and exit a context object similar to python&#39;s with syntax...">
-<polygon fill="#ffffff" stroke="#ff0000" points="0,-615.5 0,-634.5 109,-634.5 109,-615.5 0,-615.5"/>
-<text text-anchor="middle" x="54.5" y="-622.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/support/with.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="17.5178,-604.5 17.5178,-623.5 126.5178,-623.5 126.5178,-604.5 17.5178,-604.5"/>
+<text text-anchor="middle" x="72.0178" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/support/with.h</text>
 </a>
 </g>
 </g>
 <!-- Node65&#45;&gt;Node66 -->
-<g id="edge171" class="edge">
+<g id="edge172" class="edge">
 <title>Node65&#45;&gt;Node66</title>
-<path fill="none" stroke="#191970" d="M1229.2718,-792.5075C1023.9189,-790.4189 311.3064,-780.7842 215.5,-747 153.7903,-725.2393 96.5096,-670.3919 69.9822,-642.2467"/>
-<polygon fill="#191970" stroke="#191970" points="72.4094,-639.7164 63.0507,-634.75 67.2697,-644.4686 72.4094,-639.7164"/>
+<path fill="none" stroke="#191970" d="M435.762,-777.968C395.4856,-769.4632 337.9782,-755.3553 290.0178,-736 214.8049,-705.6465 133.1468,-654.5367 94.5957,-629.193"/>
+<polygon fill="#191970" stroke="#191970" points="96.4502,-626.2232 86.1806,-623.6184 92.5844,-632.0589 96.4502,-626.2232"/>
 </g>
 <!-- Node67 -->
 <g id="node45" class="node">
 <title>Node67</title>
 <g id="a_node45"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1410.5,-554 1410.5,-573 1546.5,-573 1546.5,-554 1410.5,-554"/>
-<text text-anchor="middle" x="1478.5" y="-561" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target_kind.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="720.0178,-548.5 720.0178,-567.5 856.0178,-567.5 856.0178,-548.5 720.0178,-548.5"/>
+<text text-anchor="middle" x="788.0178" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/target/target_kind.h</text>
 </a>
 </g>
 </g>
 <!-- Node65&#45;&gt;Node67 -->
-<g id="edge173" class="edge">
+<g id="edge174" class="edge">
 <title>Node65&#45;&gt;Node67</title>
-<path fill="none" stroke="#191970" d="M1292.5889,-783.4309C1322.2313,-748.3642 1425.1878,-626.5678 1463.7582,-580.9394"/>
-<polygon fill="#191970" stroke="#191970" points="1466.5938,-583.0065 1470.3765,-573.11 1461.2478,-578.4875 1466.5938,-583.0065"/>
+<path fill="none" stroke="#191970" d="M485.411,-777.7219C484.5005,-754.7054 485.2689,-696.3175 513.0178,-660 540.8839,-623.5291 665.8698,-587.8434 737.377,-569.9221"/>
+<polygon fill="#191970" stroke="#191970" points="738.2322,-573.3161 747.0964,-567.513 736.5481,-566.5217 738.2322,-573.3161"/>
 </g>
 <!-- Node66&#45;&gt;Node18 -->
-<g id="edge172" class="edge">
+<g id="edge173" class="edge">
 <title>Node66&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M52.539,-615.4791C48.4731,-594.8799 39.5,-544.5945 39.5,-502 39.5,-502 39.5,-502 39.5,-133 39.5,-101.7875 39.2296,-86.6257 63.5,-67 89.6545,-45.8508 315.8689,-25.2477 401.6797,-18.1227"/>
-<polygon fill="#191970" stroke="#191970" points="402.2132,-21.5907 411.8929,-17.2834 401.6399,-14.6142 402.2132,-21.5907"/>
+<path fill="none" stroke="#191970" d="M60.1285,-604.2632C39.6784,-586.3405 .0178,-545.9199 .0178,-502 .0178,-502 .0178,-502 .0178,-133 .0178,-101.7875 -.2669,-86.6081 24.0178,-67 50.5221,-45.5997 280.1087,-25.0801 366.4095,-18.0541"/>
+<polygon fill="#191970" stroke="#191970" points="366.7486,-21.5382 376.4352,-17.2463 366.1864,-14.5608 366.7486,-21.5382"/>
 </g>
 <!-- Node67&#45;&gt;Node5 -->
-<g id="edge174" class="edge">
+<g id="edge175" class="edge">
 <title>Node67&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1546.6447,-557.8519C1690.6757,-545.914 2023.9845,-518.288 2160.5238,-506.9711"/>
-<polygon fill="#191970" stroke="#191970" points="2161.111,-510.4345 2170.7877,-506.1204 2160.5328,-503.4584 2161.111,-510.4345"/>
+<path fill="none" stroke="#191970" d="M856.4784,-555.3741C1104.6508,-545.8552 1954.7262,-513.2495 2188.2468,-504.2926"/>
+<polygon fill="#191970" stroke="#191970" points="2188.3902,-507.7898 2198.2487,-503.909 2188.1218,-500.7949 2188.3902,-507.7898"/>
 </g>
 <!-- Node67&#45;&gt;Node23 -->
-<g id="edge175" class="edge">
+<g id="edge176" class="edge">
 <title>Node67&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1480.5871,-553.9837C1489.3991,-516.0739 1527.1585,-374.3674 1613.5,-302 1654.8713,-267.3245 1679.8517,-287.1901 1729.5,-266 1771.7161,-247.982 1817.7723,-220.8592 1844.8444,-204.0776"/>
-<polygon fill="#191970" stroke="#191970" points="1846.9701,-206.8763 1853.591,-198.605 1843.2571,-200.9421 1846.9701,-206.8763"/>
+<path fill="none" stroke="#191970" d="M784.0809,-548.1843C770.2703,-512.0351 727.5688,-384.1709 779.0178,-302 815.821,-243.2203 1030.1566,-206.9596 1119.7148,-194.1884"/>
+<polygon fill="#191970" stroke="#191970" points="1120.3298,-197.6365 1129.747,-192.782 1119.3579,-190.7043 1120.3298,-197.6365"/>
 </g>
 <!-- Node67&#45;&gt;Node18 -->
-<g id="edge177" class="edge">
+<g id="edge178" class="edge">
 <title>Node67&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1410.3197,-559.2492C1190.4023,-545.1705 508.8763,-498.5382 419.5,-456 268.251,-384.0138 153.5,-356.506 153.5,-189 153.5,-189 153.5,-189 153.5,-133 153.5,-101.7875 153.8275,-87.343 177.5,-67 210.7901,-38.3921 339.8532,-23.6549 401.4387,-18.129"/>
-<polygon fill="#191970" stroke="#191970" points="402.1069,-21.5842 411.7667,-17.2322 401.5014,-14.6105 402.1069,-21.5842"/>
+<path fill="none" stroke="#191970" d="M719.6864,-549.6759C602.2034,-534.4054 367.8177,-499.4571 298.0178,-456 231.753,-414.7439 190.0178,-395.5582 190.0178,-317.5 190.0178,-317.5 190.0178,-317.5 190.0178,-133 190.0178,-53.2548 307.0618,-27.0611 365.9714,-18.9278"/>
+<polygon fill="#191970" stroke="#191970" points="366.7792,-22.353 376.2533,-17.6105 365.8895,-15.4097 366.7792,-22.353"/>
 </g>
 <!-- Node67&#45;&gt;Node20 -->
-<g id="edge178" class="edge">
+<g id="edge179" class="edge">
 <title>Node67&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1458.112,-553.9992C1427.4425,-539.8349 1367.4848,-512.6379 1315.5,-492 1119.9077,-414.3503 1021.2964,-478.7521 869.5,-333 835.4927,-300.3468 834.3891,-240.0428 836.4347,-208.8483"/>
-<polygon fill="#191970" stroke="#191970" points="839.9312,-209.0343 837.2757,-198.7777 832.9555,-208.4517 839.9312,-209.0343"/>
+<path fill="none" stroke="#191970" d="M779.2551,-548.4888C771.5569,-539.6515 760.5875,-525.8104 754.0178,-512 700.4078,-399.3044 642.8784,-341.448 708.0178,-235 720.3194,-214.8973 744.4856,-203.2265 764.681,-196.6735"/>
+<polygon fill="#191970" stroke="#191970" points="765.901,-199.9628 774.5007,-193.7744 763.9189,-193.2492 765.901,-199.9628"/>
 </g>
 <!-- Node67&#45;&gt;Node36 -->
-<g id="edge176" class="edge">
+<g id="edge177" class="edge">
 <title>Node67&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1484.6814,-553.8825C1509.8133,-515.5874 1608.669,-372.6135 1727.5,-302 1780.5137,-270.4974 1806.1587,-293.2066 1861.5,-266 1893.905,-250.0692 1926.1825,-223.01 1945.2345,-205.5796"/>
-<polygon fill="#191970" stroke="#191970" points="1947.8553,-207.9214 1952.7813,-198.5413 1943.081,-202.8021 1947.8553,-207.9214"/>
+<path fill="none" stroke="#191970" d="M812.8973,-548.4684C910.5119,-511.4135 1277.3309,-375.3766 1590.0178,-302 1692.8863,-277.8604 1728.0697,-308.0203 1825.0178,-266 1842.2572,-258.5279 1881.2089,-225.756 1904.4793,-205.4503"/>
+<polygon fill="#191970" stroke="#191970" points="1907.0708,-207.8324 1912.2738,-198.6032 1902.451,-202.5734 1907.0708,-207.8324"/>
 </g>
 <!-- Node76&#45;&gt;Node18 -->
-<g id="edge217" class="edge">
+<g id="edge218" class="edge">
 <title>Node76&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1694.9207,-791.4859C1492.3278,-784.4479 672.5509,-752.6321 423.5,-691 203.8438,-636.6422 77.5,-543.7821 77.5,-317.5 77.5,-317.5 77.5,-317.5 77.5,-133 77.5,-95.9295 96.7009,-86.0536 128.5,-67 174.2291,-39.5996 332.1408,-23.7852 401.4486,-18.0191"/>
-<polygon fill="#191970" stroke="#191970" points="402.0968,-21.4781 411.7805,-17.1797 401.5299,-14.501 402.0968,-21.4781"/>
+<path fill="none" stroke="#191970" d="M1858.4504,-784.8317C1800.6714,-781.5096 1697.9767,-775.8091 1610.0178,-772 1492.0606,-766.8918 660.541,-771.7566 548.0178,-736 423.3808,-696.394 76.0178,-515.2785 76.0178,-384.5 76.0178,-384.5 76.0178,-384.5 76.0178,-133 76.0178,-86.2171 115.7733,-87.1008 158.0178,-67 227.5189,-33.93 318.3485,-21.9064 366.2109,-17.6803"/>
+<polygon fill="#191970" stroke="#191970" points="366.6262,-21.158 376.3053,-16.8495 366.0519,-14.1816 366.6262,-21.158"/>
 </g>
 <!-- Node76&#45;&gt;Node20 -->
-<g id="edge218" class="edge">
+<g id="edge219" class="edge">
 <title>Node76&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M1694.7741,-790.6915C1519.0982,-781.4498 893.8691,-744.9941 824.5,-691 705.4949,-598.3712 673.5222,-513.2507 717.5,-369 738.0761,-301.509 793.3586,-236.3811 821.5157,-206.3167"/>
-<polygon fill="#191970" stroke="#191970" points="824.3203,-208.4471 828.6868,-198.7939 819.2535,-203.6172 824.3203,-208.4471"/>
+<path fill="none" stroke="#191970" d="M1858.3198,-785.076C1671.4065,-775.1555 973.8748,-738.076 951.0178,-736 755.571,-718.2487 642.6906,-827.3076 513.0178,-680 444.0648,-601.6698 585.8913,-305.2968 663.0178,-235 691.2559,-209.2625 734.5006,-197.8393 764.4016,-192.8253"/>
+<polygon fill="#191970" stroke="#191970" points="765.0162,-196.2721 774.375,-191.3061 763.962,-189.3519 765.0162,-196.2721"/>
 </g>
 <!-- Node76&#45;&gt;Node16 -->
-<g id="edge215" class="edge">
+<g id="edge216" class="edge">
 <title>Node76&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1773.1552,-783.4936C1811.6523,-770.1644 1872.8814,-741.6787 1892.5,-691 1933.495,-585.1018 1837.2992,-542.5754 1876.5,-436 1906.0123,-355.7649 1945.2805,-352.1727 2014.5,-302 2038.7602,-284.4153 2057.0368,-292.241 2071.5,-266 2090.5176,-231.4959 2083.9009,-205.2256 2054.5,-179 1922.5308,-61.2835 1336.698,-24.4077 1191.9304,-17.0136"/>
-<polygon fill="#191970" stroke="#191970" points="1191.6918,-13.4975 1181.5298,-16.4949 1191.343,-20.4888 1191.6918,-13.4975"/>
+<path fill="none" stroke="#191970" d="M1911.2264,-777.7083C1918.7685,-767.4077 1930.8969,-750.6852 1941.0178,-736 1957.946,-711.4376 1962.274,-705.3379 1978.0178,-680 2013.8632,-622.3108 2017.1058,-604.3526 2055.0178,-548 2101.3644,-479.1101 2142.2563,-478.598 2169.0178,-400 2195.2118,-323.0688 2193.2275,-252.9032 2109.0178,-179 1972.0787,-58.821 1367.6295,-23.7064 1220.4227,-16.8662"/>
+<polygon fill="#191970" stroke="#191970" points="1220.3929,-13.3614 1210.2447,-16.4049 1220.0759,-20.3542 1220.3929,-13.3614"/>
 </g>
 <!-- Node76&#45;&gt;Node17 -->
-<g id="edge216" class="edge">
+<g id="edge217" class="edge">
 <title>Node76&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M1786.3792,-790.1878C2042.9962,-774.4418 3290.3937,-697.681 3329.5,-691 3515.4313,-659.2352 3709.5,-690.6252 3709.5,-502 3709.5,-502 3709.5,-502 3709.5,-250.5 3709.5,-78.4691 3439.1824,-31.3362 3330.2239,-19.3255"/>
-<polygon fill="#191970" stroke="#191970" points="3330.5093,-15.8363 3320.1977,-18.2698 3329.7762,-22.7978 3330.5093,-15.8363"/>
+<path fill="none" stroke="#191970" d="M1949.6135,-785.9244C2106.4119,-779.9485 2637.5323,-755.0825 3069.0178,-680 3167.1474,-662.9245 3193.3826,-660.602 3286.0178,-624 3309.6076,-614.6792 3724.0178,-388.207 3724.0178,-384.5 3724.0178,-384.5 3724.0178,-384.5 3724.0178,-250.5 3724.0178,-129.7467 3301.4943,-47.0098 3160.9043,-22.8223"/>
+<polygon fill="#191970" stroke="#191970" points="3161.336,-19.3455 3150.8903,-21.1168 3160.1606,-26.2462 3161.336,-19.3455"/>
 </g>
 <!-- Node77 -->
 <g id="node48" class="node">
 <title>Node77</title>
 <g id="a_node48"><a xlink:href="bound_8h.html" target="_top" xlink:title="Bound deducers. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1728.5,-727.5 1728.5,-746.5 1832.5,-746.5 1832.5,-727.5 1728.5,-727.5"/>
-<text text-anchor="middle" x="1780.5" y="-734.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/arith/bound.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1812.0178,-716.5 1812.0178,-735.5 1916.0178,-735.5 1916.0178,-716.5 1812.0178,-716.5"/>
+<text text-anchor="middle" x="1864.0178" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/arith/bound.h</text>
 </a>
 </g>
 </g>
 <!-- Node76&#45;&gt;Node77 -->
-<g id="edge191" class="edge">
+<g id="edge192" class="edge">
 <title>Node76&#45;&gt;Node77</title>
-<path fill="none" stroke="#191970" d="M1747.4675,-783.2455C1753.0311,-775.4564 1760.978,-764.3308 1767.6829,-754.9439"/>
-<polygon fill="#191970" stroke="#191970" points="1770.648,-756.8144 1773.6124,-746.6427 1764.9519,-752.7457 1770.648,-756.8144"/>
+<path fill="none" stroke="#191970" d="M1897.7678,-777.8906C1891.9313,-768.917 1883.0669,-755.2879 1875.8872,-744.2491"/>
+<polygon fill="#191970" stroke="#191970" points="1878.7422,-742.2194 1870.3559,-735.7449 1872.8742,-746.0361 1878.7422,-742.2194"/>
 </g>
 <!-- Node63 -->
 <g id="node49" class="node">
 <title>Node63</title>
 <g id="a_node49"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1664,-671.5 1664,-690.5 1747,-690.5 1747,-671.5 1664,-671.5"/>
-<text text-anchor="middle" x="1705.5" y="-678.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/expr.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1886.5178,-660.5 1886.5178,-679.5 1969.5178,-679.5 1969.5178,-660.5 1886.5178,-660.5"/>
+<text text-anchor="middle" x="1928.0178" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node76&#45;&gt;Node63 -->
-<g id="edge207" class="edge">
+<g id="edge208" class="edge">
 <title>Node76&#45;&gt;Node63</title>
-<path fill="none" stroke="#191970" d="M1735.3543,-783.3801C1730.65,-774.2423 1723.7904,-760.0058 1719.5,-747 1714.4544,-731.705 1710.7901,-713.8 1708.4788,-700.5012"/>
-<polygon fill="#191970" stroke="#191970" points="1711.9231,-699.876 1706.855,-690.572 1705.0149,-701.0058 1711.9231,-699.876"/>
+<path fill="none" stroke="#191970" d="M1909.0747,-777.948C1914.1429,-767.8429 1921.6606,-751.2778 1925.0178,-736 1928.3688,-720.7501 1929.0044,-703.0569 1928.8648,-689.8306"/>
+<polygon fill="#191970" stroke="#191970" points="1932.3556,-689.4347 1928.5913,-679.5311 1925.358,-689.6205 1932.3556,-689.4347"/>
 </g>
 <!-- Node79 -->
 <g id="node50" class="node">
 <title>Node79</title>
 <g id="a_node50"><a xlink:href="tir_2op_8h.html" target="_top" xlink:title="Common operators defined for Expr. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2186.5,-727.5 2186.5,-746.5 2260.5,-746.5 2260.5,-727.5 2186.5,-727.5"/>
-<text text-anchor="middle" x="2223.5" y="-734.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/op.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2204.0178,-716.5 2204.0178,-735.5 2278.0178,-735.5 2278.0178,-716.5 2204.0178,-716.5"/>
+<text text-anchor="middle" x="2241.0178" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/op.h</text>
 </a>
 </g>
 </g>
 <!-- Node76&#45;&gt;Node79 -->
-<g id="edge208" class="edge">
+<g id="edge209" class="edge">
 <title>Node76&#45;&gt;Node79</title>
-<path fill="none" stroke="#191970" d="M1786.3749,-787.6812C1878.1842,-777.0366 2083.6455,-753.215 2176.4309,-742.4573"/>
-<polygon fill="#191970" stroke="#191970" points="2176.8485,-745.9324 2186.3788,-741.3039 2176.0422,-738.979 2176.8485,-745.9324"/>
+<path fill="none" stroke="#191970" d="M1949.6284,-779.511C2006.1068,-769.5668 2105.2323,-751.945 2190.0178,-736 2191.2897,-735.7608 2192.5792,-735.517 2193.8803,-735.2699"/>
+<polygon fill="#191970" stroke="#191970" points="2194.8221,-738.6531 2203.9834,-733.3314 2193.503,-731.7785 2194.8221,-738.6531"/>
 </g>
 <!-- Node77&#45;&gt;Node3 -->
-<g id="edge192" class="edge">
+<g id="edge193" class="edge">
 <title>Node77&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1832.6251,-731.3885C1886.4442,-724.792 1972.478,-711.9952 2044.5,-691 2066.6876,-684.5321 2070.5093,-678.1087 2092.5,-671 2141.4514,-655.176 2198.905,-642.3419 2238.9022,-634.2905"/>
-<polygon fill="#191970" stroke="#191970" points="2239.8071,-637.6792 2248.9334,-632.2978 2238.4431,-630.8134 2239.8071,-637.6792"/>
+<path fill="none" stroke="#191970" d="M1916.2508,-720.0931C1979.1142,-712.3478 2080.4184,-697.6497 2114.0178,-680 2135.8434,-668.535 2155.0378,-647.3866 2167.0198,-632.1139"/>
+<polygon fill="#191970" stroke="#191970" points="2169.9751,-634.0081 2173.2065,-623.9181 2164.3882,-629.7907 2169.9751,-634.0081"/>
 </g>
 <!-- Node77&#45;&gt;Node36 -->
-<g id="edge206" class="edge">
+<g id="edge207" class="edge">
 <title>Node77&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1780.2022,-727.329C1779.2294,-683.5048 1779.2302,-500.0191 1841.5,-369 1867.0995,-315.1371 1895.7632,-316.4908 1927.5,-266 1939.2309,-247.337 1949.239,-224.0847 1955.511,-208.0427"/>
-<polygon fill="#191970" stroke="#191970" points="1958.7948,-209.254 1959.0736,-198.6628 1952.251,-206.7684 1958.7948,-209.254"/>
+<path fill="none" stroke="#191970" d="M1811.9664,-716.7757C1786.6567,-710.0969 1757.2073,-698.8137 1736.0178,-680 1670.8231,-622.1153 1654.4116,-575.3385 1680.0178,-492 1690.0633,-459.3055 1757.5806,-345.8195 1812.0178,-302 1844.2957,-276.0177 1869.9514,-296.483 1898.0178,-266 1912.6214,-250.1389 1918.7001,-225.6389 1921.2268,-208.548"/>
+<polygon fill="#191970" stroke="#191970" points="1924.7098,-208.8983 1922.4282,-198.5521 1917.7598,-208.0629 1924.7098,-208.8983"/>
 </g>
 <!-- Node77&#45;&gt;Node63 -->
-<g id="edge193" class="edge">
+<g id="edge194" class="edge">
 <title>Node77&#45;&gt;Node63</title>
-<path fill="none" stroke="#191970" d="M1767.436,-727.2455C1756.1438,-718.814 1739.6155,-706.4729 1726.4729,-696.6598"/>
-<polygon fill="#191970" stroke="#191970" points="1728.5211,-693.8211 1718.4143,-690.6427 1724.3331,-699.4301 1728.5211,-693.8211"/>
+<path fill="none" stroke="#191970" d="M1875.1657,-716.2455C1884.5265,-708.0549 1898.1036,-696.1749 1909.1527,-686.5069"/>
+<polygon fill="#191970" stroke="#191970" points="1911.7766,-688.8618 1916.9976,-679.6427 1907.167,-683.5937 1911.7766,-688.8618"/>
 </g>
 <!-- Node63&#45;&gt;Node3 -->
-<g id="edge194" class="edge">
+<g id="edge195" class="edge">
 <title>Node63&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1747.1089,-676.521C1763.7136,-674.7594 1783.0076,-672.745 1800.5,-671 1960.7577,-655.0127 2150.7752,-637.5229 2238.9634,-629.4898"/>
-<polygon fill="#191970" stroke="#191970" points="2239.3099,-632.9728 2248.9515,-628.5807 2238.6754,-626.0016 2239.3099,-632.9728"/>
+<path fill="none" stroke="#191970" d="M1969.6305,-660.7527C2013.7814,-650.9414 2083.5162,-635.4448 2130.4726,-625.01"/>
+<polygon fill="#191970" stroke="#191970" points="2131.4194,-628.3851 2140.4219,-622.7991 2129.9008,-621.5518 2131.4194,-628.3851"/>
 </g>
 <!-- Node63&#45;&gt;Node5 -->
-<g id="edge195" class="edge">
+<g id="edge196" class="edge">
 <title>Node63&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1742.5358,-671.443C1773.0558,-663.1235 1817.2228,-650.0563 1854.5,-635 1933.9327,-602.9171 1946.0428,-577.4194 2026.5,-548 2070.3685,-531.9594 2121.8924,-520.0481 2160.8124,-512.3699"/>
-<polygon fill="#191970" stroke="#191970" points="2161.7765,-515.7483 2170.9298,-510.4129 2160.4471,-508.8757 2161.7765,-515.7483"/>
+<path fill="none" stroke="#191970" d="M1937.7546,-660.3744C1966.2537,-632.2373 2047.9397,-551.8635 2055.0178,-548 2096.3208,-525.4549 2148.5012,-513.8723 2188.1887,-507.9723"/>
+<polygon fill="#191970" stroke="#191970" points="2188.8383,-511.4155 2198.2536,-506.5573 2187.8636,-504.4837 2188.8383,-511.4155"/>
 </g>
 <!-- Node63&#45;&gt;Node21 -->
-<g id="edge197" class="edge">
+<g id="edge198" class="edge">
 <title>Node63&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1663.8242,-672.8725C1602.9813,-659.7523 1487.7119,-630.0737 1401.5,-579 1286.9739,-511.1523 1179.5032,-390.7841 1137.3154,-340.5366"/>
-<polygon fill="#191970" stroke="#191970" points="1139.7487,-337.9891 1130.6603,-332.5438 1134.3693,-342.4682 1139.7487,-337.9891"/>
+<path fill="none" stroke="#191970" d="M1886.406,-663.1284C1842.0208,-655.3662 1769.8771,-641.5034 1709.0178,-624 1394.0144,-533.4035 1327.4279,-474.2635 1017.0178,-369 982.4652,-357.2828 943.52,-345.1452 912.0399,-335.5983"/>
+<polygon fill="#191970" stroke="#191970" points="912.6814,-332.1359 902.0964,-332.5935 910.6564,-338.8366 912.6814,-332.1359"/>
 </g>
 <!-- Node63&#45;&gt;Node22 -->
-<g id="edge201" class="edge">
+<g id="edge202" class="edge">
 <title>Node63&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1704.9614,-671.1481C1703.522,-635.9921 1702.7404,-514.6198 1754.5,-436 1797.7613,-370.2886 1985.5152,-270.8784 2017.5,-199 2021.1138,-190.8789 2023.3211,-185.7177 2017.5,-179 1999.5455,-158.28 1822.5805,-142.3916 1739.7953,-136.0299"/>
-<polygon fill="#191970" stroke="#191970" points="1739.9456,-132.5314 1729.7099,-135.2663 1739.417,-139.5114 1739.9456,-132.5314"/>
+<path fill="none" stroke="#191970" d="M1923.8482,-660.2633C1905.3263,-615.3896 1834.5805,-425.4506 1908.0178,-302 1921.1122,-279.9878 1934.4842,-283.6779 1953.0178,-266 1981.7291,-238.6142 1998.0034,-236.1203 2012.0178,-199 2015.1574,-190.684 2017.7673,-185.779 2012.0178,-179 1996.957,-161.2423 1850.5394,-144.4554 1776.4746,-137.0168"/>
+<polygon fill="#191970" stroke="#191970" points="1776.3927,-133.4916 1766.0962,-135.9881 1775.7022,-140.4575 1776.3927,-133.4916"/>
 </g>
 <!-- Node63&#45;&gt;Node18 -->
-<g id="edge205" class="edge">
+<g id="edge206" class="edge">
 <title>Node63&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M1663.6855,-679.2721C1464.5241,-670.2357 613.7359,-622.2908 419.5,-456 306.7047,-359.4328 379.3114,-270.8988 392.5,-123 394.7278,-98.0166 390.6492,-90.4691 399.5,-67 404.1145,-54.7641 412.1831,-42.5926 419.411,-33.1568"/>
-<polygon fill="#191970" stroke="#191970" points="422.1579,-35.3259 425.6894,-25.3369 416.6995,-30.9434 422.1579,-35.3259"/>
+<path fill="none" stroke="#191970" d="M1886.1905,-665.6527C1868.2819,-663.834 1847.131,-661.7426 1828.0178,-660 1189.9457,-601.824 1008.0904,-694.8534 394.0178,-512 262.9656,-472.9764 114.0178,-521.2389 114.0178,-384.5 114.0178,-384.5 114.0178,-384.5 114.0178,-133 114.0178,-78.9457 291.4627,-37.0383 366.3236,-21.7636"/>
+<polygon fill="#191970" stroke="#191970" points="367.0608,-25.1855 376.1768,-19.7865 365.6836,-18.3223 367.0608,-25.1855"/>
 </g>
 <!-- Node63&#45;&gt;Node11 -->
-<g id="edge196" class="edge">
+<g id="edge197" class="edge">
 <title>Node63&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M1747.0556,-675.8673C1763.6529,-674.0158 1782.9559,-672.1104 1800.5,-671 1893.1694,-665.1347 3383.6038,-664.9382 3471.5,-635 3639.6295,-577.7337 3747.5,-260.6308 3747.5,-250.5 3747.5,-250.5 3747.5,-250.5 3747.5,-133 3747.5,-85.463 3699.4981,-53.2195 3659.9113,-34.7112"/>
-<polygon fill="#191970" stroke="#191970" points="3661.1382,-31.4255 3650.5818,-30.5239 3658.2719,-37.8118 3661.1382,-31.4255"/>
+<path fill="none" stroke="#191970" d="M1969.6966,-669.6837C2175.7555,-667.9605 3080.5577,-658.474 3201.0178,-624 3364.0934,-577.33 3436.2921,-552.7621 3510.0178,-400 3574.6605,-266.0581 3629.6814,-187.8658 3543.0178,-67 3533.2649,-53.3981 3499.1893,-41.2205 3465.7683,-32.126"/>
+<polygon fill="#191970" stroke="#191970" points="3466.4764,-28.6931 3455.9142,-29.5245 3464.6895,-35.4612 3466.4764,-28.6931"/>
 </g>
 <!-- Node63&#45;&gt;Node16 -->
-<g id="edge203" class="edge">
+<g id="edge204" class="edge">
 <title>Node63&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1711.6592,-671.2167C1742.7221,-622.7228 1887.6908,-406.2993 2064.5,-302 2119.4653,-269.5761 2164.4396,-316.4656 2203.5,-266 2232.1036,-229.0445 2204.0556,-130.1988 2095.5,-67 2055.6285,-43.7877 1351.8192,-21.285 1191.7216,-16.4519"/>
-<polygon fill="#191970" stroke="#191970" points="1191.6623,-12.9486 1181.5618,-16.1469 1191.4522,-19.9455 1191.6623,-12.9486"/>
+<path fill="none" stroke="#191970" d="M1934.0392,-660.1937C1942.3609,-646.9314 1958.1543,-622.7777 1974.0178,-604 1996.9363,-576.8711 2003.2723,-570.1681 2031.0178,-548 2090.0593,-500.8269 2119.0761,-507.8905 2174.0178,-456 2202.5841,-429.0201 2226.0178,-423.7931 2226.0178,-384.5 2226.0178,-384.5 2226.0178,-384.5 2226.0178,-317.5 2226.0178,-230.7989 2299.0768,-187.3913 2241.0178,-123 2170.8428,-45.1712 1389.6822,-20.6895 1220.2949,-16.2786"/>
+<polygon fill="#191970" stroke="#191970" points="1220.34,-12.7787 1210.2538,-16.0222 1220.1612,-19.7764 1220.34,-12.7787"/>
 </g>
 <!-- Node63&#45;&gt;Node28 -->
-<g id="edge200" class="edge">
+<g id="edge201" class="edge">
 <title>Node63&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1747.3398,-674.6098C1837.3195,-661.1384 2056.0301,-629.8468 2240.5,-615 2293.1454,-610.7629 3153.8676,-617.0468 3190.5,-579 3256.6714,-510.2736 3154.3805,-392.1076 3040.5,-302 3016.6562,-283.1336 2985.1926,-270.683 2958.3306,-262.7583"/>
-<polygon fill="#191970" stroke="#191970" points="2959.2391,-259.378 2948.6649,-260.0399 2957.3439,-266.1166 2959.2391,-259.378"/>
+<path fill="none" stroke="#191970" d="M1952.0803,-660.4755C1989.8028,-645.9439 2065.4391,-618.4092 2132.0178,-604 2485.2708,-527.5474 2588.3681,-595.5175 2940.0178,-512 2975.0408,-503.682 3071.4285,-485.5277 3092.0178,-456 3147.6468,-376.2208 3000.2907,-295.8711 2932.7611,-264.3785"/>
+<polygon fill="#191970" stroke="#191970" points="2933.9595,-261.0777 2923.4106,-260.0924 2931.0427,-267.441 2933.9595,-261.0777"/>
 </g>
 <!-- Node63&#45;&gt;Node33 -->
-<g id="edge199" class="edge">
+<g id="edge200" class="edge">
 <title>Node63&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1682.3576,-671.4148C1666.9382,-663.8153 1647.5451,-651.6879 1636.5,-635 1561.1115,-521.0967 1592.222,-468.9983 1579.5,-333 1578.2168,-319.2821 1576.4131,-315.4275 1579.5,-302 1581.6679,-292.5697 1585.8028,-282.8635 1590.0949,-274.4553"/>
-<polygon fill="#191970" stroke="#191970" points="1593.2092,-276.0547 1594.9113,-265.5975 1587.0595,-272.7108 1593.2092,-276.0547"/>
+<path fill="none" stroke="#191970" d="M1886.3485,-662.8984C1858.0841,-656.4549 1821.0828,-644.6878 1794.0178,-624 1698.4315,-550.9362 1677.354,-515.0061 1642.0178,-400 1627.44,-352.5547 1665.7654,-301.8179 1692.8467,-273.3369"/>
+<polygon fill="#191970" stroke="#191970" points="1695.625,-275.5029 1700.1335,-265.9152 1690.6301,-270.5987 1695.625,-275.5029"/>
 </g>
 <!-- Node63&#45;&gt;Node36 -->
-<g id="edge204" class="edge">
+<g id="edge205" class="edge">
 <title>Node63&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1706.5813,-671.3872C1711.4058,-631.558 1733.6105,-476.5396 1797.5,-369 1830.5666,-313.342 1857.2476,-314.171 1900.5,-266 1917.9452,-246.5709 1936.553,-223.0394 1948.7799,-207.1439"/>
-<polygon fill="#191970" stroke="#191970" points="1951.8977,-208.828 1955.1847,-198.7559 1946.3341,-204.5798 1951.8977,-208.828"/>
+<path fill="none" stroke="#191970" d="M1915.1737,-660.2493C1894.3102,-643.6846 1853.256,-607.9352 1832.0178,-568 1796.0727,-500.4108 1802.5147,-476.0799 1794.0178,-400 1792.4885,-386.3074 1788.9928,-381.8287 1794.0178,-369 1808.8148,-331.2234 1823.0804,-325.6886 1856.0178,-302 1887.6713,-279.2348 1914.8475,-298.7417 1936.0178,-266 1947.2344,-248.6525 1940.4928,-224.6764 1933.1971,-208.1084"/>
+<polygon fill="#191970" stroke="#191970" points="1936.2388,-206.3584 1928.7385,-198.8753 1929.9353,-209.4024 1936.2388,-206.3584"/>
 </g>
 <!-- Node63&#45;&gt;Node42 -->
-<g id="edge198" class="edge">
+<g id="edge199" class="edge">
 <title>Node63&#45;&gt;Node42</title>
-<path fill="none" stroke="#191970" d="M1663.6741,-676.4253C1529.4037,-660.8942 1112.7161,-605.862 1021.5,-512 976.3839,-465.5752 972.0459,-383.6782 973.0251,-342.5498"/>
-<polygon fill="#191970" stroke="#191970" points="976.5227,-342.676 973.3897,-332.555 969.5274,-342.4208 976.5227,-342.676"/>
+<path fill="none" stroke="#191970" d="M1886.5009,-661.7195C1851.1191,-654.0006 1799.2775,-641.1099 1756.0178,-624 1540.2451,-538.6587 1301.5479,-391.9012 1216.8995,-338.0689"/>
+<polygon fill="#191970" stroke="#191970" points="1218.5814,-334.9903 1208.2686,-332.562 1214.8162,-340.8914 1218.5814,-334.9903"/>
 </g>
 <!-- Node63&#45;&gt;Node45 -->
-<g id="edge202" class="edge">
+<g id="edge203" class="edge">
 <title>Node63&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1747.0795,-676.1973C1763.6802,-674.3912 1782.9791,-672.4308 1800.5,-671 2151.4811,-642.3376 3062.1623,-725.2635 3382.5,-579 3425.9575,-559.1577 3457.5,-549.7731 3457.5,-502 3457.5,-502 3457.5,-502 3457.5,-446 3457.5,-404.1565 3480.8108,-359.5457 3495.5992,-335.4548"/>
-<polygon fill="#191970" stroke="#191970" points="3498.5613,-337.3192 3500.9634,-327.0002 3492.6506,-333.569 3498.5613,-337.3192"/>
+<path fill="none" stroke="#191970" d="M1969.7035,-669.9087C2172.7382,-669.2689 3052.9834,-664.1818 3168.0178,-624 3296.0215,-579.288 3350.7832,-397.1946 3365.631,-336.9538"/>
+<polygon fill="#191970" stroke="#191970" points="3369.0459,-337.7219 3367.9476,-327.1842 3362.2348,-336.1068 3369.0459,-337.7219"/>
 </g>
 <!-- Node79&#45;&gt;Node3 -->
-<g id="edge209" class="edge">
+<g id="edge210" class="edge">
 <title>Node79&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M2229.0419,-727.4509C2239.8351,-708.8533 2263.8395,-667.4919 2277.8235,-643.3964"/>
-<polygon fill="#191970" stroke="#191970" points="2280.9798,-644.9307 2282.9722,-634.5249 2274.9255,-641.417 2280.9798,-644.9307"/>
+<path fill="none" stroke="#191970" d="M2235.817,-716.4509C2225.6879,-697.8533 2203.1607,-656.4919 2190.0373,-632.3964"/>
+<polygon fill="#191970" stroke="#191970" points="2193.0622,-630.6328 2185.2055,-623.5249 2186.9148,-633.9809 2193.0622,-630.6328"/>
 </g>
 <!-- Node79&#45;&gt;Node49 -->
-<g id="edge210" class="edge">
+<g id="edge211" class="edge">
 <title>Node79&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M2245.4106,-727.4359C2286.4965,-708.1121 2368.308,-662.2212 2337.5,-615 2322.6319,-592.2107 2295.1681,-579.4376 2270.5842,-572.3138"/>
-<polygon fill="#191970" stroke="#191970" points="2271.1899,-568.8537 2260.6273,-569.6797 2269.3995,-575.6209 2271.1899,-568.8537"/>
+<path fill="none" stroke="#191970" d="M2243.2172,-716.2124C2247.6579,-693.7518 2255.182,-637.8441 2229.0178,-604 2219.2548,-591.3713 2183.6676,-578.9853 2152.9491,-570.2531"/>
+<polygon fill="#191970" stroke="#191970" points="2153.7569,-566.8451 2143.1856,-567.5506 2151.8896,-573.5915 2153.7569,-566.8451"/>
 </g>
 <!-- Node79&#45;&gt;Node22 -->
-<g id="edge212" class="edge">
+<g id="edge213" class="edge">
 <title>Node79&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M2203.6188,-727.3577C2152.3583,-701.2265 2016.182,-623.4471 1965.5,-512 1902.9686,-374.4965 2132.8037,-292.8251 2033.5,-179 2014.4341,-157.146 1825.68,-141.6831 1739.735,-135.719"/>
-<polygon fill="#191970" stroke="#191970" points="1739.7525,-132.2122 1729.5372,-135.0226 1739.2755,-139.1959 1739.7525,-132.2122"/>
+<path fill="none" stroke="#191970" d="M2203.9868,-724.0656C2094.6462,-718.1543 1778.2077,-699.5381 1736.0178,-680 1703.7918,-665.0762 1696.652,-654.8264 1679.0178,-624 1637.4139,-551.272 1643.9458,-503.9131 1693.0178,-436 1789.5026,-302.4701 1908.0648,-348.1514 1978.0178,-199 1981.7922,-190.9523 1983.6912,-185.8429 1978.0178,-179 1952.8674,-148.6651 1839.197,-138.1971 1776.2665,-134.6912"/>
+<polygon fill="#191970" stroke="#191970" points="1776.4212,-131.1946 1766.2523,-134.1682 1776.0561,-138.185 1776.4212,-131.1946"/>
 </g>
 <!-- Node79&#45;&gt;Node17 -->
-<g id="edge214" class="edge">
+<g id="edge215" class="edge">
 <title>Node79&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M2260.5984,-736.5268C2460.6851,-733.6513 3405.7954,-715.8417 3504.5,-635 3642.2626,-522.1688 3616.98,-388.957 3527.5,-235 3477.2014,-148.4575 3458.0965,-126.3612 3377.5,-67 3357.7955,-52.4872 3333.58,-39.0867 3314.9394,-29.595"/>
-<polygon fill="#191970" stroke="#191970" points="3316.2628,-26.3436 3305.7532,-25.0025 3313.1326,-32.6048 3316.2628,-26.3436"/>
+<path fill="none" stroke="#191970" d="M2278.2731,-724.6157C2410.9135,-719.5517 2859.8169,-701.2184 3002.0178,-680 3107.3561,-664.282 3132.9684,-654.4816 3235.0178,-624 3341.6683,-592.1441 3390.1476,-599.4416 3459.0178,-512 3521.5269,-432.6348 3591.5852,-392.1656 3546.0178,-302 3465.5,-142.6772 3250.3309,-57.9306 3158.5037,-28.0901"/>
+<polygon fill="#191970" stroke="#191970" points="3159.5604,-24.7535 3148.9696,-25.0459 3157.4312,-31.4218 3159.5604,-24.7535"/>
 </g>
 <!-- Node79&#45;&gt;Node45 -->
-<g id="edge213" class="edge">
+<g id="edge214" class="edge">
 <title>Node79&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2260.5296,-735.9968C2394.3874,-731.8027 2865.2526,-712.1123 3244.5,-635 3368.2173,-609.8446 3495.5,-628.2488 3495.5,-502 3495.5,-502 3495.5,-502 3495.5,-446 3495.5,-407.3337 3500.953,-362.4053 3504.5072,-337.2279"/>
-<polygon fill="#191970" stroke="#191970" points="3507.9981,-337.544 3505.9802,-327.1431 3501.0716,-336.5323 3507.9981,-337.544"/>
+<path fill="none" stroke="#191970" d="M2278.1784,-724.733C2433.1942,-719.0404 3028.2555,-693.0258 3201.0178,-624 3315.5969,-578.2208 3337.4751,-520.2726 3365.0178,-400 3369.8064,-379.0892 3370.7072,-354.466 3370.628,-337.5977"/>
+<polygon fill="#191970" stroke="#191970" points="3374.1225,-337.2383 3370.4523,-327.2995 3367.1235,-337.3578 3374.1225,-337.2383"/>
 </g>
 <!-- Node79&#45;&gt;Node63 -->
-<g id="edge211" class="edge">
+<g id="edge212" class="edge">
 <title>Node79&#45;&gt;Node63</title>
-<path fill="none" stroke="#191970" d="M2186.2467,-732.9726C2095.2111,-723.1309 1861.3009,-697.8433 1757.0151,-686.5692"/>
-<polygon fill="#191970" stroke="#191970" points="1757.3566,-683.0858 1747.0383,-685.4906 1756.6042,-690.0453 1757.3566,-683.0858"/>
+<path fill="none" stroke="#191970" d="M2203.8835,-719.3562C2148.2011,-709.3938 2042.8839,-690.5511 1979.7161,-679.2495"/>
+<polygon fill="#191970" stroke="#191970" points="1980.0343,-675.751 1969.5742,-677.435 1978.8015,-682.6416 1980.0343,-675.751"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/arg__info_8h.html b/docs/reference/api/doxygen/arg__info_8h.html
index e60a294e5..f24caac27 100644
--- a/docs/reference/api/doxygen/arg__info_8h.html
+++ b/docs/reference/api/doxygen/arg__info_8h.html
@@ -69,7 +69,8 @@ $(function() {
 <div class="title">arg_info.h File Reference</div>  </div>
 </div><!--header-->
 <div class="contents">
-<div class="textblock"><code>#include &lt;<a class="el" href="node_8h_source.html">tvm/node/node.h</a>&gt;</code><br />
+<div class="textblock"><code>#include &lt;<a class="el" href="ir_2module_8h_source.html">tvm/ir/module.h</a>&gt;</code><br />
+<code>#include &lt;<a class="el" href="node_8h_source.html">tvm/node/node.h</a>&gt;</code><br />
 <code>#include &lt;<a class="el" href="reflection_8h_source.html">tvm/node/reflection.h</a>&gt;</code><br />
 <code>#include &lt;<a class="el" href="shape__tuple_8h_source.html">tvm/runtime/container/shape_tuple.h</a>&gt;</code><br />
 <code>#include &lt;<a class="el" href="data__type_8h_source.html">tvm/runtime/data_type.h</a>&gt;</code><br />
@@ -78,7 +79,7 @@ $(function() {
 </div><div class="textblock"><div class="dynheader">
 Include dependency graph for arg_info.h:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="arg__info_8h__incl.svg" width="4715" height="1306"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="arg__info_8h__incl.svg" width="5182" height="1306"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div><div class="textblock"><div class="dynheader">
diff --git a/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg b/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg
index 467e5f121..a3720d573 100644
--- a/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/arg__info_8h__dep__incl.svg
@@ -9,16 +9,16 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 437)">
 <title>include/tvm/meta_schedule/arg_info.h</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-437 766.6884,-437 766.6884,4 -4,4"/>
-<!-- Node56 -->
+<!-- Node63 -->
 <g id="node1" class="node">
-<title>Node56</title>
+<title>Node63</title>
 <polygon fill="#bfbfbf" stroke="#000000" points="312.6884,-402.5 312.6884,-432.5 464.6884,-432.5 464.6884,-402.5 312.6884,-402.5"/>
 <text text-anchor="start" x="320.6884" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
 <text text-anchor="middle" x="388.6884" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
 </g>
-<!-- Node57 -->
+<!-- Node64 -->
 <g id="node2" class="node">
-<title>Node57</title>
+<title>Node64</title>
 <g id="a_node2"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
 <polygon fill="#ffffff" stroke="#000000" points="468.6884,-268.5 468.6884,-298.5 620.6884,-298.5 620.6884,-268.5 468.6884,-268.5"/>
 <text text-anchor="start" x="476.6884" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -26,15 +26,15 @@
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node57 -->
+<!-- Node63&#45;&gt;Node64 -->
 <g id="edge1" class="edge">
-<title>Node56&#45;&gt;Node57</title>
+<title>Node63&#45;&gt;Node64</title>
 <path fill="none" stroke="#191970" d="M427.6596,-397.3475C442.7565,-388.6773 459.7288,-377.8167 473.6884,-366 497.9192,-345.4888 520.9242,-316.3688 533.877,-298.7724"/>
 <polygon fill="#191970" stroke="#191970" points="425.5603,-394.5117 418.5458,-402.4519 428.9809,-400.6191 425.5603,-394.5117"/>
 </g>
-<!-- Node58 -->
+<!-- Node65 -->
 <g id="node3" class="node">
-<title>Node58</title>
+<title>Node65</title>
 <g id="a_node3"><a xlink:href="search__strategy_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/search_strategy.h">
 <polygon fill="#ffffff" stroke="#000000" points="364.6884,-201.5 364.6884,-231.5 516.6884,-231.5 516.6884,-201.5 364.6884,-201.5"/>
 <text text-anchor="start" x="372.6884" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -42,15 +42,15 @@
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node58 -->
+<!-- Node63&#45;&gt;Node65 -->
 <g id="edge24" class="edge">
-<title>Node56&#45;&gt;Node58</title>
+<title>Node63&#45;&gt;Node65</title>
 <path fill="none" stroke="#191970" d="M346.6858,-397.6659C293.7244,-369.9566 213.8854,-317.4821 251.6884,-268 265.7536,-249.5894 318.3842,-236.1879 364.5449,-227.6786"/>
 <polygon fill="#191970" stroke="#191970" points="345.407,-400.9431 355.9035,-402.3832 348.596,-394.7117 345.407,-400.9431"/>
 </g>
-<!-- Node62 -->
+<!-- Node69 -->
 <g id="node7" class="node">
-<title>Node62</title>
+<title>Node69</title>
 <g id="a_node7"><a xlink:href="database_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/database.h">
 <polygon fill="#ffffff" stroke="#000000" points="14.6884,-268.5 14.6884,-298.5 166.6884,-298.5 166.6884,-268.5 14.6884,-268.5"/>
 <text text-anchor="start" x="22.6884" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -58,15 +58,15 @@
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node62 -->
+<!-- Node63&#45;&gt;Node69 -->
 <g id="edge9" class="edge">
-<title>Node56&#45;&gt;Node62</title>
+<title>Node63&#45;&gt;Node69</title>
 <path fill="none" stroke="#191970" d="M314.4739,-399.8525C284.0771,-391.3952 248.9942,-380.0215 218.6884,-366 177.8979,-347.1276 134.837,-316.9393 110.5088,-298.7864"/>
 <polygon fill="#191970" stroke="#191970" points="313.5978,-403.2414 324.1663,-402.4956 315.4394,-396.4879 313.5978,-403.2414"/>
 </g>
-<!-- Node64 -->
+<!-- Node71 -->
 <g id="node9" class="node">
-<title>Node64</title>
+<title>Node71</title>
 <g id="a_node9"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
 <polygon fill="#ffffff" stroke="#000000" points="312.6884,-335.5 312.6884,-365.5 464.6884,-365.5 464.6884,-335.5 312.6884,-335.5"/>
 <text text-anchor="start" x="320.6884" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -74,15 +74,15 @@
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node64 -->
+<!-- Node63&#45;&gt;Node71 -->
 <g id="edge13" class="edge">
-<title>Node56&#45;&gt;Node64</title>
+<title>Node63&#45;&gt;Node71</title>
 <path fill="none" stroke="#191970" d="M388.6884,-392.0249C388.6884,-383.128 388.6884,-373.4287 388.6884,-365.6432"/>
 <polygon fill="#191970" stroke="#191970" points="385.1885,-392.2966 388.6884,-402.2967 392.1885,-392.2967 385.1885,-392.2966"/>
 </g>
-<!-- Node66 -->
+<!-- Node73 -->
 <g id="node11" class="node">
-<title>Node66</title>
+<title>Node73</title>
 <g id="a_node11"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
 <polygon fill="#ffffff" stroke="#000000" points="585.6884,-335.5 585.6884,-365.5 737.6884,-365.5 737.6884,-335.5 585.6884,-335.5"/>
 <text text-anchor="start" x="593.6884" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -90,21 +90,21 @@
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node66 -->
+<!-- Node63&#45;&gt;Node73 -->
 <g id="edge18" class="edge">
-<title>Node56&#45;&gt;Node66</title>
+<title>Node63&#45;&gt;Node73</title>
 <path fill="none" stroke="#191970" d="M459.9028,-400.0225C503.4253,-389.3411 558.5309,-375.8171 600.1547,-365.6017"/>
 <polygon fill="#191970" stroke="#191970" points="458.8325,-396.6812 449.9549,-402.4639 460.501,-403.4795 458.8325,-396.6812"/>
 </g>
-<!-- Node57&#45;&gt;Node58 -->
+<!-- Node64&#45;&gt;Node65 -->
 <g id="edge2" class="edge">
-<title>Node57&#45;&gt;Node58</title>
+<title>Node64&#45;&gt;Node65</title>
 <path fill="none" stroke="#191970" d="M512.5494,-262.7951C496.9661,-252.7558 478.5823,-240.9124 464.31,-231.7177"/>
 <polygon fill="#191970" stroke="#191970" points="511.0467,-265.9904 521.3488,-268.4639 514.8378,-260.1058 511.0467,-265.9904"/>
 </g>
-<!-- Node60 -->
+<!-- Node67 -->
 <g id="node5" class="node">
-<title>Node60</title>
+<title>Node67</title>
 <g id="a_node5"><a xlink:href="task__scheduler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/task_scheduler.h">
 <polygon fill="#ffffff" stroke="#000000" points="506.6884,-.5 506.6884,-30.5 658.6884,-30.5 658.6884,-.5 506.6884,-.5"/>
 <text text-anchor="start" x="514.6884" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -112,15 +112,15 @@
 </a>
 </g>
 </g>
-<!-- Node57&#45;&gt;Node60 -->
+<!-- Node64&#45;&gt;Node67 -->
 <g id="edge8" class="edge">
-<title>Node57&#45;&gt;Node60</title>
+<title>Node64&#45;&gt;Node67</title>
 <path fill="none" stroke="#191970" d="M578.254,-262.7238C608.068,-242.0992 649.6313,-207.4702 667.6884,-165 689.6508,-113.3445 633.5149,-56.9316 602.1431,-30.607"/>
 <polygon fill="#191970" stroke="#191970" points="576.1413,-259.9269 569.8024,-268.4162 580.0518,-265.7328 576.1413,-259.9269"/>
 </g>
-<!-- Node59 -->
+<!-- Node66 -->
 <g id="node4" class="node">
-<title>Node59</title>
+<title>Node66</title>
 <g id="a_node4"><a xlink:href="measure__callback_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_callback.h">
 <polygon fill="#ffffff" stroke="#000000" points="402.6884,-67.5 402.6884,-97.5 554.6884,-97.5 554.6884,-67.5 402.6884,-67.5"/>
 <text text-anchor="start" x="410.6884" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -128,15 +128,15 @@
 </a>
 </g>
 </g>
-<!-- Node58&#45;&gt;Node59 -->
+<!-- Node65&#45;&gt;Node66 -->
 <g id="edge3" class="edge">
-<title>Node58&#45;&gt;Node59</title>
+<title>Node65&#45;&gt;Node66</title>
 <path fill="none" stroke="#191970" d="M447.7195,-191.706C455.5208,-164.1962 467.8634,-120.6723 474.3606,-97.7614"/>
 <polygon fill="#191970" stroke="#191970" points="444.3347,-190.8134 444.9736,-201.389 451.0692,-192.7232 444.3347,-190.8134"/>
 </g>
-<!-- Node61 -->
+<!-- Node68 -->
 <g id="node6" class="node">
-<title>Node61</title>
+<title>Node68</title>
 <g id="a_node6"><a xlink:href="tune__context_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/tune_context.h">
 <polygon fill="#ffffff" stroke="#000000" points="506.6884,-134.5 506.6884,-164.5 658.6884,-164.5 658.6884,-134.5 506.6884,-134.5"/>
 <text text-anchor="start" x="514.6884" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -144,45 +144,45 @@
 </a>
 </g>
 </g>
-<!-- Node58&#45;&gt;Node61 -->
+<!-- Node65&#45;&gt;Node68 -->
 <g id="edge5" class="edge">
-<title>Node58&#45;&gt;Node61</title>
+<title>Node65&#45;&gt;Node68</title>
 <path fill="none" stroke="#191970" d="M481.6137,-197.1902C503.4895,-186.8685 530.0381,-174.3421 550.4359,-164.7177"/>
 <polygon fill="#191970" stroke="#191970" points="480.1063,-194.0313 472.556,-201.4639 483.0934,-200.362 480.1063,-194.0313"/>
 </g>
-<!-- Node59&#45;&gt;Node60 -->
+<!-- Node66&#45;&gt;Node67 -->
 <g id="edge4" class="edge">
-<title>Node59&#45;&gt;Node60</title>
+<title>Node66&#45;&gt;Node67</title>
 <path fill="none" stroke="#191970" d="M510.8274,-61.7951C526.4107,-51.7558 544.7945,-39.9124 559.0669,-30.7177"/>
 <polygon fill="#191970" stroke="#191970" points="508.5391,-59.1058 502.028,-67.4639 512.3301,-64.9904 508.5391,-59.1058"/>
 </g>
-<!-- Node61&#45;&gt;Node59 -->
+<!-- Node68&#45;&gt;Node66 -->
 <g id="edge6" class="edge">
-<title>Node61&#45;&gt;Node59</title>
+<title>Node68&#45;&gt;Node66</title>
 <path fill="none" stroke="#191970" d="M550.5494,-128.7951C534.9661,-118.7558 516.5823,-106.9124 502.31,-97.7177"/>
 <polygon fill="#191970" stroke="#191970" points="549.0467,-131.9904 559.3488,-134.4639 552.8378,-126.1058 549.0467,-131.9904"/>
 </g>
-<!-- Node61&#45;&gt;Node60 -->
+<!-- Node68&#45;&gt;Node67 -->
 <g id="edge7" class="edge">
-<title>Node61&#45;&gt;Node60</title>
+<title>Node68&#45;&gt;Node67</title>
 <path fill="none" stroke="#191970" d="M582.6884,-124.3415C582.6884,-96.8131 582.6884,-53.5714 582.6884,-30.7614"/>
 <polygon fill="#191970" stroke="#191970" points="579.1885,-124.3889 582.6884,-134.389 586.1885,-124.389 579.1885,-124.3889"/>
 </g>
-<!-- Node62&#45;&gt;Node58 -->
+<!-- Node69&#45;&gt;Node65 -->
 <g id="edge11" class="edge">
-<title>Node62&#45;&gt;Node58</title>
+<title>Node69&#45;&gt;Node65</title>
 <path fill="none" stroke="#191970" d="M177.2131,-266.9367C234.6328,-255.9449 309.2965,-241.6522 364.4747,-231.0895"/>
 <polygon fill="#191970" stroke="#191970" points="176.2285,-263.5616 167.0649,-268.8794 177.5446,-270.4367 176.2285,-263.5616"/>
 </g>
-<!-- Node62&#45;&gt;Node60 -->
+<!-- Node69&#45;&gt;Node67 -->
 <g id="edge12" class="edge">
-<title>Node62&#45;&gt;Node60</title>
+<title>Node69&#45;&gt;Node67</title>
 <path fill="none" stroke="#191970" d="M39.5373,-263.7472C26.1452,-255.9655 13.3301,-245.5649 5.6884,-232 -1.074,-219.996 -2.3072,-212.2204 5.6884,-201 64.7295,-118.1466 363.8026,-54.6197 506.5932,-28.4969"/>
 <polygon fill="#191970" stroke="#191970" points="37.9063,-266.8441 48.3725,-268.4897 41.2169,-260.6764 37.9063,-266.8441"/>
 </g>
-<!-- Node63 -->
+<!-- Node70 -->
 <g id="node8" class="node">
-<title>Node63</title>
+<title>Node70</title>
 <g id="a_node8"><a xlink:href="apply__history__best_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/apply_history_best.h">
 <polygon fill="#ffffff" stroke="#000000" points="14.6884,-201.5 14.6884,-231.5 166.6884,-231.5 166.6884,-201.5 14.6884,-201.5"/>
 <text text-anchor="start" x="22.6884" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -190,33 +190,33 @@
 </a>
 </g>
 </g>
-<!-- Node62&#45;&gt;Node63 -->
+<!-- Node69&#45;&gt;Node70 -->
 <g id="edge10" class="edge">
-<title>Node62&#45;&gt;Node63</title>
+<title>Node69&#45;&gt;Node70</title>
 <path fill="none" stroke="#191970" d="M90.6884,-258.0249C90.6884,-249.128 90.6884,-239.4287 90.6884,-231.6432"/>
 <polygon fill="#191970" stroke="#191970" points="87.1885,-258.2966 90.6884,-268.2967 94.1885,-258.2967 87.1885,-258.2966"/>
 </g>
-<!-- Node64&#45;&gt;Node57 -->
+<!-- Node71&#45;&gt;Node64 -->
 <g id="edge14" class="edge">
-<title>Node64&#45;&gt;Node57</title>
+<title>Node71&#45;&gt;Node64</title>
 <path fill="none" stroke="#191970" d="M433.04,-331.4516C457.2837,-321.0392 486.8929,-308.3224 509.5263,-298.6017"/>
 <polygon fill="#191970" stroke="#191970" points="431.505,-328.3016 423.6979,-335.4639 434.2675,-334.7335 431.505,-328.3016"/>
 </g>
-<!-- Node64&#45;&gt;Node58 -->
+<!-- Node71&#45;&gt;Node65 -->
 <g id="edge17" class="edge">
-<title>Node64&#45;&gt;Node58</title>
+<title>Node71&#45;&gt;Node65</title>
 <path fill="none" stroke="#191970" d="M406.0236,-327.1997C411.7006,-318.6788 417.5942,-308.7351 421.6884,-299 431.1471,-276.5096 436.328,-248.4976 438.8018,-231.5813"/>
 <polygon fill="#191970" stroke="#191970" points="403.0929,-325.2841 400.2705,-335.4961 408.8452,-329.273 403.0929,-325.2841"/>
 </g>
-<!-- Node64&#45;&gt;Node59 -->
+<!-- Node71&#45;&gt;Node66 -->
 <g id="edge16" class="edge">
-<title>Node64&#45;&gt;Node59</title>
+<title>Node71&#45;&gt;Node66</title>
 <path fill="none" stroke="#191970" d="M319.7732,-332.5322C290.1015,-323.2721 260.2377,-311.3471 251.6884,-299 243.8452,-287.6726 245.7846,-280.4488 251.6884,-268 292.1431,-182.6968 393.1585,-123.6182 445.7277,-97.5482"/>
 <polygon fill="#191970" stroke="#191970" points="318.7875,-335.8906 329.3729,-335.4422 320.8183,-329.1916 318.7875,-335.8906"/>
 </g>
-<!-- Node65 -->
+<!-- Node72 -->
 <g id="node10" class="node">
-<title>Node65</title>
+<title>Node72</title>
 <g id="a_node10"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
 <polygon fill="#ffffff" stroke="#000000" points="260.6884,-268.5 260.6884,-298.5 412.6884,-298.5 412.6884,-268.5 260.6884,-268.5"/>
 <text text-anchor="start" x="268.6884" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -224,39 +224,39 @@
 </a>
 </g>
 </g>
-<!-- Node64&#45;&gt;Node65 -->
+<!-- Node71&#45;&gt;Node72 -->
 <g id="edge15" class="edge">
-<title>Node64&#45;&gt;Node65</title>
+<title>Node71&#45;&gt;Node72</title>
 <path fill="none" stroke="#191970" d="M370.7219,-327.3509C363.3467,-317.8482 355.0151,-307.1132 348.4413,-298.6432"/>
 <polygon fill="#191970" stroke="#191970" points="367.9926,-329.5427 376.8888,-335.2967 373.5225,-325.2508 367.9926,-329.5427"/>
 </g>
-<!-- Node66&#45;&gt;Node57 -->
+<!-- Node73&#45;&gt;Node64 -->
 <g id="edge19" class="edge">
-<title>Node66&#45;&gt;Node57</title>
+<title>Node73&#45;&gt;Node64</title>
 <path fill="none" stroke="#191970" d="M626.5118,-330.3561C608.7756,-320.1995 587.6192,-308.0843 571.2627,-298.7177"/>
 <polygon fill="#191970" stroke="#191970" points="625.0142,-333.5317 635.4313,-335.4639 628.4928,-327.4572 625.0142,-333.5317"/>
 </g>
-<!-- Node66&#45;&gt;Node58 -->
+<!-- Node73&#45;&gt;Node65 -->
 <g id="edge21" class="edge">
-<title>Node66&#45;&gt;Node58</title>
+<title>Node73&#45;&gt;Node65</title>
 <path fill="none" stroke="#191970" d="M658.0919,-325.3637C654.1691,-306.9874 646.098,-282.7862 629.6884,-268 612.7292,-252.7186 561.5804,-239.3017 516.7649,-229.9964"/>
 <polygon fill="#191970" stroke="#191970" points="654.6876,-326.2017 659.954,-335.395 661.5701,-324.9242 654.6876,-326.2017"/>
 </g>
-<!-- Node66&#45;&gt;Node59 -->
+<!-- Node73&#45;&gt;Node66 -->
 <g id="edge20" class="edge">
-<title>Node66&#45;&gt;Node59</title>
+<title>Node73&#45;&gt;Node66</title>
 <path fill="none" stroke="#191970" d="M664.3028,-324.8312C664.8242,-307.5721 663.0071,-284.9641 652.6884,-268 609.7049,-197.3339 544.7537,-233.0158 497.6884,-165 483.5542,-144.5741 479.8012,-115.1627 478.8828,-97.5313"/>
 <polygon fill="#191970" stroke="#191970" points="660.7811,-325.0993 663.6882,-335.2874 667.769,-325.51 660.7811,-325.0993"/>
 </g>
-<!-- Node66&#45;&gt;Node60 -->
+<!-- Node73&#45;&gt;Node67 -->
 <g id="edge22" class="edge">
-<title>Node66&#45;&gt;Node60</title>
+<title>Node73&#45;&gt;Node67</title>
 <path fill="none" stroke="#191970" d="M692.1544,-328.9659C721.8909,-305.1754 762.6884,-263.8987 762.6884,-216.5 762.6884,-216.5 762.6884,-216.5 762.6884,-149.5 762.6884,-85.9975 689.7145,-49.0364 636.6943,-30.5625"/>
 <polygon fill="#191970" stroke="#191970" points="689.8654,-326.3117 684.1036,-335.2029 694.1524,-331.8454 689.8654,-326.3117"/>
 </g>
-<!-- Node66&#45;&gt;Node61 -->
+<!-- Node73&#45;&gt;Node68 -->
 <g id="edge23" class="edge">
-<title>Node66&#45;&gt;Node61</title>
+<title>Node73&#45;&gt;Node68</title>
 <path fill="none" stroke="#191970" d="M684.0219,-327.6413C690.7196,-319.3061 697.2064,-309.3658 700.6884,-299 705.0757,-285.9394 705.9475,-280.7346 700.6884,-268 681.6825,-221.9782 635.1837,-184.4361 606.5579,-164.6667"/>
 <polygon fill="#191970" stroke="#191970" points="681.2196,-325.5321 677.3573,-335.3979 686.529,-330.094 681.2196,-325.5321"/>
 </g>
diff --git a/docs/reference/api/doxygen/arg__info_8h__incl.svg b/docs/reference/api/doxygen/arg__info_8h__incl.svg
index 7a374ef38..7df06d658 100644
--- a/docs/reference/api/doxygen/arg__info_8h__incl.svg
+++ b/docs/reference/api/doxygen/arg__info_8h__incl.svg
@@ -4,1580 +4,1713 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/meta_schedule/arg_info.h Pages: 1 -->
-<svg width="3536pt" height="979pt"
- viewBox="0.00 0.00 3536.00 979.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="3886pt" height="979pt"
+ viewBox="0.00 0.00 3886.00 979.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 975)">
 <title>include/tvm/meta_schedule/arg_info.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-975 3532,-975 3532,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-975 3882,-975 3882,4 -4,4"/>
 <!-- Node0 -->
 <g id="node1" class="node">
 <title>Node0</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1977,-940.5 1977,-970.5 2129,-970.5 2129,-940.5 1977,-940.5"/>
-<text text-anchor="start" x="1985" y="-958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2053" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="498,-940.5 498,-970.5 650,-970.5 650,-940.5 498,-940.5"/>
+<text text-anchor="start" x="506" y="-958.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="574" y="-947.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
 </g>
 <!-- Node1 -->
 <g id="node2" class="node">
 <title>Node1</title>
-<g id="a_node2"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="1773.5,-548.5 1773.5,-567.5 1872.5,-567.5 1872.5,-548.5 1773.5,-548.5"/>
-<text text-anchor="middle" x="1823" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
+<g id="a_node2"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
+<polygon fill="#ffffff" stroke="#000000" points="1467.5,-828.5 1467.5,-847.5 1562.5,-847.5 1562.5,-828.5 1467.5,-828.5"/>
+<text text-anchor="middle" x="1515" y="-835.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node1 -->
 <g id="edge1" class="edge">
 <title>Node0&#45;&gt;Node1</title>
-<path fill="none" stroke="#191970" d="M2065.7073,-940.2644C2082.2296,-918.8562 2109,-877.9978 2109,-838 2109,-838 2109,-838 2109,-726 2109,-668.96 1937.7309,-599.5951 1860.2657,-571.1386"/>
-<polygon fill="#191970" stroke="#191970" points="1861.0169,-567.6875 1850.4229,-567.5579 1858.6238,-574.2657 1861.0169,-567.6875"/>
+<path fill="none" stroke="#191970" d="M650.2089,-945.984C833.238,-923.1297 1294.4595,-865.5383 1457.118,-845.2276"/>
+<polygon fill="#191970" stroke="#191970" points="1457.9049,-848.6566 1467.3941,-843.9444 1457.0375,-841.7105 1457.9049,-848.6566"/>
 </g>
-<!-- Node2 -->
-<g id="node3" class="node">
-<title>Node2</title>
-<g id="a_node3"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="1100.5,-492.5 1100.5,-511.5 1221.5,-511.5 1221.5,-492.5 1100.5,-492.5"/>
-<text text-anchor="middle" x="1161" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
+<!-- Node5 -->
+<g id="node6" class="node">
+<title>Node5</title>
+<g id="a_node6"><a xlink:href="node_8h.html" target="_top" xlink:title="Definitions and helper macros for IR/AST nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="1467.5,-492.5 1467.5,-511.5 1566.5,-511.5 1566.5,-492.5 1467.5,-492.5"/>
+<text text-anchor="middle" x="1517" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/node.h</text>
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node2 -->
-<g id="edge132" class="edge">
-<title>Node0&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M1976.836,-955.1192C1709.5589,-953.0019 825.9473,-939.1126 742,-848 686.9551,-788.2568 580.8281,-949.9152 876,-716 972.5823,-639.4613 1091.3846,-552.4888 1139.3759,-517.643"/>
-<polygon fill="#191970" stroke="#191970" points="1141.7069,-520.2761 1147.7476,-511.572 1137.5974,-514.6093 1141.7069,-520.2761"/>
+<!-- Node0&#45;&gt;Node5 -->
+<g id="edge181" class="edge">
+<title>Node0&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M565.8393,-940.3373C558.9717,-925.4003 551.5165,-902.2349 560,-884 625.1951,-743.8665 687.67,-728.9393 826,-660 1042.4017,-552.1523 1333.6562,-516.7803 1457.3982,-506.1619"/>
+<polygon fill="#191970" stroke="#191970" points="1457.7577,-509.6441 1467.4317,-505.324 1457.1751,-502.6684 1457.7577,-509.6441"/>
 </g>
 <!-- Node6 -->
 <g id="node7" class="node">
 <title>Node6</title>
-<g id="a_node7"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
-<polygon fill="#ffffff" stroke="#000000" points="1366.5,-123.5 1366.5,-142.5 1485.5,-142.5 1485.5,-123.5 1366.5,-123.5"/>
-<text text-anchor="middle" x="1426" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
+<g id="a_node7"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="1367.5,-436.5 1367.5,-455.5 1488.5,-455.5 1488.5,-436.5 1367.5,-436.5"/>
+<text text-anchor="middle" x="1428" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node6 -->
-<g id="edge135" class="edge">
+<g id="edge182" class="edge">
 <title>Node0&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1976.8835,-955.2022C1605.336,-953.2597 0,-939.0218 0,-838 0,-838 0,-838 0,-726 0,-665.0505 892.9419,-197.55 951,-179 1024.5658,-155.4951 1242.894,-141.8292 1356.1258,-136.1375"/>
-<polygon fill="#191970" stroke="#191970" points="1356.6012,-139.6184 1366.4157,-135.6277 1356.2547,-132.6269 1356.6012,-139.6184"/>
+<path fill="none" stroke="#191970" d="M574,-940.2899C574,-917.892 574,-874.7188 574,-838 574,-838 574,-838 574,-782 574,-612.9659 1160.6922,-493.3101 1361.5163,-457.3034"/>
+<polygon fill="#191970" stroke="#191970" points="1362.4049,-460.7003 1371.6364,-455.5013 1361.1777,-453.8087 1362.4049,-460.7003"/>
 </g>
-<!-- Node24 -->
-<g id="node25" class="node">
-<title>Node24</title>
-<g id="a_node25"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
-<polygon fill="#ffffff" stroke="#000000" points="1760,-297 1760,-316 1898,-316 1898,-297 1760,-297"/>
-<text text-anchor="middle" x="1829" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
+<!-- Node10 -->
+<g id="node11" class="node">
+<title>Node10</title>
+<g id="a_node11"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
+<polygon fill="#ffffff" stroke="#000000" points="1260.5,-67.5 1260.5,-86.5 1379.5,-86.5 1379.5,-67.5 1260.5,-67.5"/>
+<text text-anchor="middle" x="1320" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/object.h</text>
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node24 -->
-<g id="edge134" class="edge">
-<title>Node0&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M2129.0344,-952.7407C2335.1176,-944.2564 2889,-914.3514 2889,-838 2889,-838 2889,-838 2889,-782 2889,-617.9818 2391.4708,-494.7022 2243,-425 2203.2542,-406.3406 2189.5578,-410.0047 2151,-389 2130.311,-377.7294 2129.707,-367.158 2108,-358 2022.7695,-322.0418 1993.8379,-339.4846 1903,-322 1896.7406,-320.7952 1890.193,-319.4897 1883.692,-318.1651"/>
-<polygon fill="#191970" stroke="#191970" points="1884.0627,-314.6681 1873.5623,-316.0795 1882.651,-321.5243 1884.0627,-314.6681"/>
+<!-- Node0&#45;&gt;Node10 -->
+<g id="edge185" class="edge">
+<title>Node0&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M497.579,-949.9023C375.9702,-938.8673 152,-908.9174 152,-838 152,-838 152,-838 152,-317.5 152,-178.3926 610.6111,-150.367 747,-123 841.4353,-104.0512 1119.2184,-87.5256 1250.1754,-80.5416"/>
+<polygon fill="#191970" stroke="#191970" points="1250.6845,-84.0196 1260.4853,-79.9953 1250.3141,-77.0294 1250.6845,-84.0196"/>
 </g>
 <!-- Node28 -->
-<g id="node29" class="node">
+<g id="node25" class="node">
 <title>Node28</title>
-<g id="a_node29"><a xlink:href="shape__tuple_8h.html" target="_top" xlink:title="Runtime ShapeTuple container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="2660,-291.5 2660,-321.5 2786,-321.5 2786,-291.5 2660,-291.5"/>
-<text text-anchor="start" x="2668" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="2723" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/shape_tuple.h</text>
+<g id="a_node25"><a xlink:href="data__type_8h.html" target="_top" xlink:title="tvm/runtime/data_type.h">
+<polygon fill="#ffffff" stroke="#000000" points="1994,-241 1994,-260 2132,-260 2132,-241 1994,-241"/>
+<text text-anchor="middle" x="2063" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/data_type.h</text>
 </a>
 </g>
 </g>
 <!-- Node0&#45;&gt;Node28 -->
-<g id="edge133" class="edge">
+<g id="edge184" class="edge">
 <title>Node0&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2129.1964,-953.462C2431.2352,-944.8904 3528,-908.8849 3528,-838 3528,-838 3528,-838 3528,-670 3528,-331.875 2786,-840.125 2786,-502 2786,-502 2786,-502 2786,-440.5 2786,-398.6482 2760.5203,-355.8333 2741.9688,-330.2067"/>
-<polygon fill="#191970" stroke="#191970" points="2744.5406,-327.8015 2735.749,-321.8889 2738.9346,-331.9935 2744.5406,-327.8015"/>
+<path fill="none" stroke="#191970" d="M650.0397,-954.5577C984.2476,-950.2836 2324.7158,-931.7195 2745,-904 2975.4003,-888.8041 3623.7788,-963.846 3778,-792 3846.2893,-715.9064 3624.2696,-452.8636 3175,-302 3150.5343,-293.7845 2392.2947,-263.4081 2142.1722,-253.5856"/>
+<polygon fill="#191970" stroke="#191970" points="2142.1968,-250.084 2132.0672,-253.1891 2141.9223,-257.0786 2142.1968,-250.084"/>
 </g>
-<!-- Node45 -->
-<g id="node43" class="node">
-<title>Node45</title>
-<g id="a_node43"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
-<polygon fill="#ffffff" stroke="#000000" points="1912.5,-884.5 1912.5,-903.5 2013.5,-903.5 2013.5,-884.5 1912.5,-884.5"/>
-<text text-anchor="middle" x="1963" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/function.h</text>
+<!-- Node32 -->
+<g id="node29" class="node">
+<title>Node32</title>
+<g id="a_node29"><a xlink:href="shape__tuple_8h.html" target="_top" xlink:title="Runtime ShapeTuple container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1198,-235.5 1198,-265.5 1324,-265.5 1324,-235.5 1198,-235.5"/>
+<text text-anchor="start" x="1206" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1261" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/shape_tuple.h</text>
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node45 -->
-<g id="edge136" class="edge">
-<title>Node0&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2030.7528,-940.2977C2017.0033,-930.9022 1999.4079,-918.8787 1985.5943,-909.4395"/>
-<polygon fill="#191970" stroke="#191970" points="1987.3725,-906.4155 1977.1414,-903.6633 1983.4232,-912.195 1987.3725,-906.4155"/>
+<!-- Node0&#45;&gt;Node32 -->
+<g id="edge183" class="edge">
+<title>Node0&#45;&gt;Node32</title>
+<path fill="none" stroke="#191970" d="M524.65,-940.3566C480.2669,-923.3113 422,-890.8665 422,-838 422,-838 422,-838 422,-614 422,-570.7203 1031.0335,-337.3231 1211.3074,-269.1855"/>
+<polygon fill="#191970" stroke="#191970" points="1212.6974,-272.4019 1220.8159,-265.5946 1210.2243,-265.8533 1212.6974,-272.4019"/>
+</g>
+<!-- Node58 -->
+<g id="node47" class="node">
+<title>Node58</title>
+<g id="a_node47"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
+<polygon fill="#ffffff" stroke="#000000" points="2635.5,-884.5 2635.5,-903.5 2736.5,-903.5 2736.5,-884.5 2635.5,-884.5"/>
+<text text-anchor="middle" x="2686" y="-891.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/function.h</text>
+</a>
+</g>
+</g>
+<!-- Node0&#45;&gt;Node58 -->
+<g id="edge186" class="edge">
+<title>Node0&#45;&gt;Node58</title>
+<path fill="none" stroke="#191970" d="M650.1021,-953.284C984.6704,-943.5416 2321.6588,-904.6094 2624.7062,-895.7848"/>
+<polygon fill="#191970" stroke="#191970" points="2625.1531,-899.2734 2635.047,-895.4837 2624.9493,-892.2764 2625.1531,-899.2734"/>
+</g>
+<!-- Node2 -->
+<g id="node3" class="node">
+<title>Node2</title>
+<g id="a_node3"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
+<polygon fill="#ffffff" stroke="#000000" points="1736,-716.5 1736,-735.5 1810,-735.5 1810,-716.5 1736,-716.5"/>
+<text text-anchor="middle" x="1773" y="-723.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/adt.h</text>
+</a>
+</g>
 </g>
 <!-- Node1&#45;&gt;Node2 -->
 <g id="edge2" class="edge">
 <title>Node1&#45;&gt;Node2</title>
-<path fill="none" stroke="#191970" d="M1773.4082,-553.9229C1677.1256,-545.9898 1458.1549,-527.8634 1274,-512 1260.342,-510.8235 1245.7573,-509.5475 1231.7595,-508.313"/>
-<polygon fill="#191970" stroke="#191970" points="1231.9616,-504.8173 1221.6923,-507.4235 1231.3454,-511.7902 1231.9616,-504.8173"/>
+<path fill="none" stroke="#191970" d="M1536.9969,-828.4509C1582.8843,-808.5308 1688.9329,-762.4942 1741.8273,-739.5323"/>
+<polygon fill="#191970" stroke="#191970" points="1743.2795,-742.7175 1751.0587,-735.5249 1740.492,-736.2965 1743.2795,-742.7175"/>
 </g>
 <!-- Node3 -->
 <g id="node4" class="node">
 <title>Node3</title>
-<g id="a_node4"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="#ffffff" stroke="#000000" points="1813.5,-358.5 1813.5,-388.5 1926.5,-388.5 1926.5,-358.5 1813.5,-358.5"/>
-<text text-anchor="start" x="1821.5" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="1870" y="-365.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+<g id="a_node4"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
+<polygon fill="#ffffff" stroke="#000000" points="2716.5,-660.5 2716.5,-679.5 2795.5,-679.5 2795.5,-660.5 2716.5,-660.5"/>
+<text text-anchor="middle" x="2756" y="-667.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node1&#45;&gt;Node3 -->
-<g id="edge123" class="edge">
+<g id="edge141" class="edge">
 <title>Node1&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1802.8439,-548.4543C1789.1185,-540.7394 1772.0365,-528.4561 1764,-512 1760.0993,-504.0127 1760.828,-500.3037 1764,-492 1779.3762,-451.7476 1815.415,-416.5229 1841.1994,-395.1617"/>
-<polygon fill="#191970" stroke="#191970" points="1843.5899,-397.73 1849.1715,-388.7246 1839.1923,-392.2838 1843.5899,-397.73"/>
+<path fill="none" stroke="#191970" d="M1562.6414,-835.1627C1742.1253,-824.2022 2377.26,-782.896 2574,-736 2628.6895,-722.9639 2689.6139,-698.7852 2725.2757,-683.591"/>
+<polygon fill="#191970" stroke="#191970" points="2726.837,-686.7295 2734.638,-679.5604 2724.069,-680.3 2726.837,-686.7295"/>
 </g>
-<!-- Node1&#45;&gt;Node6 -->
-<g id="edge127" class="edge">
-<title>Node1&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1773.0901,-550.7364C1647.0117,-531.2603 1321.3018,-473.5552 1259,-389 1250.8272,-377.908 1250.4699,-368.8196 1259,-358 1301.447,-304.1604 1364.1817,-373.8822 1409,-322 1450.7514,-273.6681 1439.0169,-190.1517 1430.8608,-152.4566"/>
-<polygon fill="#191970" stroke="#191970" points="1434.27,-151.6642 1428.6049,-142.7111 1427.4503,-153.2428 1434.27,-151.6642"/>
+<!-- Node16 -->
+<g id="node14" class="node">
+<title>Node16</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3212,-6 3212,-25 3256,-25 3256,-6 3212,-6"/>
+<text text-anchor="middle" x="3234" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
 </g>
-<!-- Node7 -->
-<g id="node8" class="node">
-<title>Node7</title>
-<g id="a_node8"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
-<polygon fill="#ffffff" stroke="#000000" points="1901.5,-56.5 1901.5,-86.5 2030.5,-86.5 2030.5,-56.5 1901.5,-56.5"/>
-<text text-anchor="start" x="1909.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
-<text text-anchor="middle" x="1966" y="-63.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
-</a>
+<!-- Node1&#45;&gt;Node16 -->
+<g id="edge176" class="edge">
+<title>Node1&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1562.6993,-837.2247C1791.4162,-833.4115 2777.3284,-815.8664 3084,-792 3281.4179,-776.6361 3335.3508,-789.5049 3526,-736 3666.5351,-696.5594 3802,-703.9646 3802,-558 3802,-558 3802,-558 3802,-133 3802,-98.6999 3791.032,-85.2656 3762,-67 3719.9872,-40.5674 3374.3295,-22.1331 3266.2022,-16.9699"/>
+<polygon fill="#191970" stroke="#191970" points="3266.3645,-13.4737 3256.2106,-16.4984 3266.0345,-20.4659 3266.3645,-13.4737"/>
 </g>
+<!-- Node18 -->
+<g id="node16" class="node">
+<title>Node18</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1095.5,-6 1095.5,-25 1140.5,-25 1140.5,-6 1095.5,-6"/>
+<text text-anchor="middle" x="1118" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
 </g>
-<!-- Node1&#45;&gt;Node7 -->
-<g id="edge125" class="edge">
-<title>Node1&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1872.5271,-555.2716C2068.5699,-543.7998 2789.2116,-495.0817 2988,-389 3058.7935,-351.2217 3080.1902,-329.8927 3109,-255 3146.5139,-157.4804 3036.8443,-200.9368 2934,-179 2605.9185,-109.0199 2204.978,-82.9424 2041.1486,-74.7498"/>
-<polygon fill="#191970" stroke="#191970" points="2040.8637,-71.2316 2030.7037,-74.2357 2040.5195,-78.2231 2040.8637,-71.2316"/>
+<!-- Node1&#45;&gt;Node18 -->
+<g id="edge179" class="edge">
+<title>Node1&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1467.1569,-837.0265C1209.6366,-831.4799 0,-801.4128 0,-726 0,-726 0,-726 0,-317.5 0,-122.2117 172.6151,-118.465 361,-67 500.5236,-28.8835 957.7081,-18.2647 1084.9005,-16.0157"/>
+<polygon fill="#191970" stroke="#191970" points="1085.2262,-19.5107 1095.1648,-15.8399 1085.1063,-12.5117 1085.2262,-19.5107"/>
 </g>
-<!-- Node12 -->
-<g id="node13" class="node">
-<title>Node12</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="358,-62 358,-81 402,-81 402,-62 358,-62"/>
-<text text-anchor="middle" x="380" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">string</text>
+<!-- Node20 -->
+<g id="node18" class="node">
+<title>Node20</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1013.5,-179.5 1013.5,-198.5 1060.5,-198.5 1060.5,-179.5 1013.5,-179.5"/>
+<text text-anchor="middle" x="1037" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
 </g>
-<!-- Node1&#45;&gt;Node12 -->
-<g id="edge128" class="edge">
-<title>Node1&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M1773.3365,-556.6333C1618.0614,-552.1429 1143.0752,-536.7016 991,-512 786.3456,-478.7579 701.6761,-520.0406 541,-389 440.6306,-307.1429 396.6987,-146.5424 384.0631,-90.9711"/>
-<polygon fill="#191970" stroke="#191970" points="387.4469,-90.0612 381.889,-81.0412 380.6089,-91.5584 387.4469,-90.0612"/>
+<!-- Node1&#45;&gt;Node20 -->
+<g id="edge180" class="edge">
+<title>Node1&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1467.4573,-837.4669C1362.2887,-835.6438 1102.2667,-827.5878 888,-792 791.0149,-775.8916 762.7184,-778.3629 674,-736 583.1354,-692.6123 551.5736,-664.251 522,-568 477.9447,-424.6166 152.3384,-605.0431 703,-235 751.6209,-202.3269 928.3884,-192.6253 1003.1947,-189.9475"/>
+<polygon fill="#191970" stroke="#191970" points="1003.5881,-193.4363 1013.4645,-189.6015 1003.3523,-186.4403 1003.5881,-193.4363"/>
 </g>
-<!-- Node13 -->
-<g id="node14" class="node">
-<title>Node13</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="853.5,-62 853.5,-81 922.5,-81 922.5,-62 853.5,-62"/>
-<text text-anchor="middle" x="888" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
+<!-- Node21 -->
+<g id="node19" class="node">
+<title>Node21</title>
+<g id="a_node19"><a xlink:href="array_8h.html" target="_top" xlink:title="Runtime Array container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1722,-302.5 1722,-332.5 1848,-332.5 1848,-302.5 1722,-302.5"/>
+<text text-anchor="start" x="1730" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1785" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
+</a>
 </g>
-<!-- Node1&#45;&gt;Node13 -->
-<g id="edge129" class="edge">
-<title>Node1&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1773.2369,-556.87C1611.4842,-552.9501 1107.0672,-538.696 1039,-512 936.2242,-471.6912 910.2351,-427.8868 879,-322 853.9683,-237.1426 860.7295,-210.4714 874,-123 875.6358,-112.2177 878.727,-100.4634 881.5894,-90.9103"/>
-<polygon fill="#191970" stroke="#191970" points="884.9727,-91.8184 884.6341,-81.229 878.2952,-89.7182 884.9727,-91.8184"/>
 </g>
-<!-- Node14 -->
-<g id="node15" class="node">
-<title>Node14</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="3009.5,-62 3009.5,-81 3054.5,-81 3054.5,-62 3009.5,-62"/>
-<text text-anchor="middle" x="3032" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">utility</text>
+<!-- Node1&#45;&gt;Node21 -->
+<g id="edge173" class="edge">
+<title>Node1&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1562.602,-831.2307C1626.1924,-820.2628 1740.0021,-793.7216 1819,-736 1917.576,-663.9733 1928.0229,-623.3884 1978,-512 1992.2098,-480.3295 2014.7083,-465.2393 1996,-436 1975.4223,-403.8389 1950.1103,-420.6569 1918,-400 1900.2425,-388.5764 1899.4327,-380.9134 1882,-369 1864.8661,-357.2907 1844.7671,-346.2286 1827.3217,-337.3801"/>
+<polygon fill="#191970" stroke="#191970" points="1828.4496,-334.0316 1817.9385,-332.702 1825.3262,-340.2962 1828.4496,-334.0316"/>
 </g>
-<!-- Node1&#45;&gt;Node14 -->
-<g id="edge130" class="edge">
-<title>Node1&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1872.552,-557.0511C2021.457,-553.3507 2477.324,-535.8106 2845,-456 3084.5955,-403.9915 3376,-551.6752 3376,-306.5 3376,-306.5 3376,-306.5 3376,-189 3376,-123.5702 3150.32,-87.0469 3064.5467,-75.5285"/>
-<polygon fill="#191970" stroke="#191970" points="3064.9416,-72.0503 3054.5708,-74.2174 3064.0295,-78.9907 3064.9416,-72.0503"/>
+<!-- Node33 -->
+<g id="node30" class="node">
+<title>Node33</title>
+<g id="a_node30"><a xlink:href="string_8h.html" target="_top" xlink:title="Runtime String container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="2518,-235.5 2518,-265.5 2644,-265.5 2644,-235.5 2518,-235.5"/>
+<text text-anchor="start" x="2526" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="2581" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
+</a>
 </g>
-<!-- Node16 -->
-<g id="node17" class="node">
-<title>Node16</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1210.5,-235.5 1210.5,-254.5 1257.5,-254.5 1257.5,-235.5 1210.5,-235.5"/>
-<text text-anchor="middle" x="1234" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">vector</text>
 </g>
-<!-- Node1&#45;&gt;Node16 -->
-<g id="edge131" class="edge">
-<title>Node1&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1773.2398,-556.5629C1617.4501,-551.835 1145.5982,-535.7003 1081,-512 1041.9455,-497.6714 1036.0843,-484.7316 1006,-456 949.8917,-402.4144 898.6989,-350.0708 949,-291 965.1491,-272.0354 1128.0775,-254.7856 1200.0863,-248.0234"/>
-<polygon fill="#191970" stroke="#191970" points="1200.7903,-251.4732 1210.425,-247.0661 1200.1448,-244.5031 1200.7903,-251.4732"/>
+<!-- Node1&#45;&gt;Node33 -->
+<g id="edge175" class="edge">
+<title>Node1&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M1562.9466,-832.7694C1617.9413,-826.1166 1710.5055,-812.9131 1788,-792 2047.6001,-721.9427 2116.5053,-701.3435 2350,-568 2454.3963,-508.3817 2488.4791,-495.1189 2562,-400 2590.4717,-363.1643 2606.7903,-347.9001 2599,-302 2597.4902,-293.1043 2594.6114,-283.6975 2591.5916,-275.3919"/>
+<polygon fill="#191970" stroke="#191970" points="2594.7966,-273.976 2587.9207,-265.9154 2588.2692,-276.5046 2594.7966,-273.976"/>
 </g>
-<!-- Node21 -->
-<g id="node22" class="node">
-<title>Node21</title>
-<g id="a_node22"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
-<polygon fill="#ffffff" stroke="#000000" points="1770.5,-179.5 1770.5,-198.5 1899.5,-198.5 1899.5,-179.5 1770.5,-179.5"/>
-<text text-anchor="middle" x="1835" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
+<!-- Node36 -->
+<g id="node33" class="node">
+<title>Node36</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3262.5,-179.5 3262.5,-198.5 3355.5,-198.5 3355.5,-179.5 3262.5,-179.5"/>
+<text text-anchor="middle" x="3309" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_map</text>
+</g>
+<!-- Node1&#45;&gt;Node36 -->
+<g id="edge177" class="edge">
+<title>Node1&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M1562.5859,-836.7857C1877.602,-828.5954 3650,-780.1095 3650,-726 3650,-726 3650,-726 3650,-614 3650,-412.648 3414.2714,-252.3221 3334.8008,-203.9929"/>
+<polygon fill="#191970" stroke="#191970" points="3336.286,-200.8024 3325.9102,-198.6591 3332.6848,-206.8051 3336.286,-200.8024"/>
+</g>
+<!-- Node42 -->
+<g id="node35" class="node">
+<title>Node42</title>
+<g id="a_node35"><a xlink:href="map_8h.html" target="_top" xlink:title="Runtime Map container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="2446,-302.5 2446,-332.5 2572,-332.5 2572,-302.5 2446,-302.5"/>
+<text text-anchor="start" x="2454" y="-320.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="2509" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/map.h</text>
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node21 -->
-<g id="edge126" class="edge">
-<title>Node1&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1842.4848,-548.4557C1860.8316,-539.3714 1889.0026,-525.1711 1913,-512 1979.8884,-475.288 1989.1273,-450.6152 2061,-425 2110.3516,-407.4113 2259.8987,-428.7748 2294,-389 2406.6615,-257.5948 2066.3434,-209.6069 1909.5754,-194.7705"/>
-<polygon fill="#191970" stroke="#191970" points="1909.8014,-191.2766 1899.5215,-193.8407 1909.1567,-198.2468 1909.8014,-191.2766"/>
+<!-- Node1&#45;&gt;Node42 -->
+<g id="edge174" class="edge">
+<title>Node1&#45;&gt;Node42</title>
+<path fill="none" stroke="#191970" d="M1562.8338,-833.1561C1611.0065,-827.2324 1686.8837,-815.0182 1749,-792 1985.6453,-704.307 2032.1346,-650.661 2243,-512 2331.0107,-454.1258 2346.1857,-430.0837 2432,-369 2446.5096,-358.6719 2462.875,-347.6421 2476.7754,-338.4505"/>
+<polygon fill="#191970" stroke="#191970" points="2479.081,-341.1231 2485.5097,-332.7015 2475.2324,-335.276 2479.081,-341.1231"/>
 </g>
-<!-- Node25 -->
-<g id="node26" class="node">
-<title>Node25</title>
-<g id="a_node26"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
-<polygon fill="#ffffff" stroke="#000000" points="1053.5,-425.5 1053.5,-455.5 1166.5,-455.5 1166.5,-425.5 1053.5,-425.5"/>
-<text text-anchor="start" x="1061.5" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
-<text text-anchor="middle" x="1110" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
+<!-- Node49 -->
+<g id="node39" class="node">
+<title>Node49</title>
+<g id="a_node39"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
+<polygon fill="#ffffff" stroke="#000000" points="1779,-604.5 1779,-623.5 1859,-623.5 1859,-604.5 1779,-604.5"/>
+<text text-anchor="middle" x="1819" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/type.h</text>
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node25 -->
-<g id="edge124" class="edge">
-<title>Node1&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M1773.4375,-556.7248C1612.7735,-552.353 1115.3395,-536.8543 1092,-512 1080.0994,-499.327 1085.8919,-479.9769 1093.9538,-464.6012"/>
-<polygon fill="#191970" stroke="#191970" points="1097.1606,-466.0468 1099.1289,-455.6364 1091.0982,-462.5471 1097.1606,-466.0468"/>
+<!-- Node1&#45;&gt;Node49 -->
+<g id="edge159" class="edge">
+<title>Node1&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M1528.0625,-828.375C1575.5176,-793.4081 1739.0797,-672.8886 1797.8015,-629.6199"/>
+<polygon fill="#191970" stroke="#191970" points="1800.0118,-632.3389 1805.9862,-623.5891 1795.8594,-626.7035 1800.0118,-632.3389"/>
 </g>
-<!-- Node43 -->
+<!-- Node51 -->
 <g id="node41" class="node">
-<title>Node43</title>
-<g id="a_node41"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="1772.5,-492.5 1772.5,-511.5 1903.5,-511.5 1903.5,-492.5 1772.5,-492.5"/>
-<text text-anchor="middle" x="1838" y="-499.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/repr_printer.h</text>
+<title>Node51</title>
+<g id="a_node41"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="2707,-772.5 2707,-791.5 2805,-791.5 2805,-772.5 2707,-772.5"/>
+<text text-anchor="middle" x="2756" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node43 -->
-<g id="edge120" class="edge">
-<title>Node1&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M1825.6128,-548.2455C1827.5916,-540.8579 1830.3744,-530.4689 1832.8012,-521.4087"/>
-<polygon fill="#191970" stroke="#191970" points="1836.2105,-522.2078 1835.4171,-511.6427 1829.4489,-520.3966 1836.2105,-522.2078"/>
+<!-- Node1&#45;&gt;Node51 -->
+<g id="edge142" class="edge">
+<title>Node1&#45;&gt;Node51</title>
+<path fill="none" stroke="#191970" d="M1562.5834,-836.969C1751.6742,-832.7467 2451.0877,-815.908 2696.5178,-791.8657"/>
+<polygon fill="#191970" stroke="#191970" points="2697.2124,-795.3134 2706.8097,-790.8253 2696.5083,-788.3489 2697.2124,-795.3134"/>
+</g>
+<!-- Node54 -->
+<g id="node43" class="node">
+<title>Node54</title>
+<g id="a_node43"><a xlink:href="source__map_8h.html" target="_top" xlink:title="A map from source names to source code. ">
+<polygon fill="#ffffff" stroke="#000000" points="1547.5,-604.5 1547.5,-623.5 1688.5,-623.5 1688.5,-604.5 1547.5,-604.5"/>
+<text text-anchor="middle" x="1618" y="-611.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/parser/source_map.h</text>
+</a>
+</g>
+</g>
+<!-- Node1&#45;&gt;Node54 -->
+<g id="edge160" class="edge">
+<title>Node1&#45;&gt;Node54</title>
+<path fill="none" stroke="#191970" d="M1467.3751,-836.5724C1358.5589,-832.8202 1099.0525,-820.9046 1073,-792 1033.7221,-748.4222 1033.2992,-703.1929 1073,-660 1103.7482,-626.5472 1393.1861,-617.4032 1537.2901,-614.9186"/>
+<polygon fill="#191970" stroke="#191970" points="1537.4682,-618.4162 1547.4088,-614.7509 1537.3522,-611.4172 1537.4682,-618.4162"/>
+</g>
+<!-- Node57 -->
+<g id="node46" class="node">
+<title>Node57</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="897,-772.5 897,-791.5 983,-791.5 983,-772.5 897,-772.5"/>
+<text text-anchor="middle" x="940" y="-779.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_set</text>
+</g>
+<!-- Node1&#45;&gt;Node57 -->
+<g id="edge178" class="edge">
+<title>Node1&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M1467.3586,-835.1303C1375.9096,-829.3206 1169.5295,-814.7528 997,-792 995.7393,-791.8337 994.464,-791.6595 993.1786,-791.4784"/>
+<polygon fill="#191970" stroke="#191970" points="993.6099,-788.004 983.2004,-789.977 992.5683,-794.9261 993.6099,-788.004"/>
 </g>
 <!-- Node2&#45;&gt;Node3 -->
 <g id="edge3" class="edge">
 <title>Node2&#45;&gt;Node3</title>
-<path fill="none" stroke="#191970" d="M1221.5464,-493.6207C1226.4324,-493.0379 1231.2963,-492.4888 1236,-492 1449.6682,-469.795 1514.3579,-524.389 1718,-456 1742.0743,-447.9151 1743.9186,-437.544 1766,-425 1786.0232,-413.6252 1808.8979,-402.1416 1828.0865,-392.9138"/>
-<polygon fill="#191970" stroke="#191970" points="1829.6628,-396.0397 1837.1816,-388.5753 1826.649,-389.7217 1829.6628,-396.0397"/>
-</g>
-<!-- Node2&#45;&gt;Node6 -->
-<g id="edge85" class="edge">
-<title>Node2&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1100.3818,-492.7017C1070.1747,-485.9928 1034.1814,-474.7049 1006,-456 973.9011,-434.6949 964.5211,-425.0753 951,-389 918.0477,-301.081 903.8034,-241.3542 974,-179 1001.9345,-154.1864 1236.1311,-140.9046 1355.9583,-135.6775"/>
-<polygon fill="#191970" stroke="#191970" points="1356.2955,-139.1663 1366.1363,-135.2409 1355.9954,-132.1728 1356.2955,-139.1663"/>
-</g>
-<!-- Node2&#45;&gt;Node7 -->
-<g id="edge81" class="edge">
-<title>Node2&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1221.7077,-498.1448C1334.5105,-490.8577 1584.0536,-474.1562 1794,-456 1931.5095,-444.1081 1965.3261,-434.8064 2103,-425 2197.5117,-418.268 2870.2176,-431.3052 2955,-389 2989.4726,-371.7987 2997.0187,-358.273 3010,-322 3037.1095,-246.2494 2972.7307,-211.2004 2899,-179 2797.928,-134.8588 2513.5338,-155.8973 2404,-143 2275.7429,-127.8981 2127.672,-101.9209 2040.715,-85.7833"/>
-<polygon fill="#191970" stroke="#191970" points="2041.1503,-82.3043 2030.6785,-83.914 2039.8686,-89.186 2041.1503,-82.3043"/>
+<path fill="none" stroke="#191970" d="M1810.037,-723.8901C1961.4763,-715.2628 2533.7195,-682.663 2706.3803,-672.8268"/>
+<polygon fill="#191970" stroke="#191970" points="2706.6699,-676.316 2716.4546,-672.2528 2706.2717,-669.3274 2706.6699,-676.316"/>
 </g>
-<!-- Node2&#45;&gt;Node12 -->
-<g id="edge117" class="edge">
-<title>Node2&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M1100.2949,-496.328C1053.0194,-490.3829 986.4956,-478.6153 932,-456 697.1769,-358.5499 462.5692,-148.8034 397.6098,-88.2084"/>
-<polygon fill="#191970" stroke="#191970" points="399.9061,-85.5635 390.2191,-81.2725 395.1158,-90.6678 399.9061,-85.5635"/>
+<!-- Node2&#45;&gt;Node5 -->
+<g id="edge132" class="edge">
+<title>Node2&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M1735.7767,-718.5563C1685.1254,-706.6812 1594.3612,-678.9604 1538,-624 1512.4409,-599.0761 1510.0683,-583.6474 1512,-548 1512.4662,-539.3971 1513.4193,-529.9288 1514.3696,-521.8301"/>
+<polygon fill="#191970" stroke="#191970" points="1517.8502,-522.2023 1515.6176,-511.8453 1510.9043,-521.334 1517.8502,-522.2023"/>
 </g>
-<!-- Node2&#45;&gt;Node13 -->
-<g id="edge118" class="edge">
-<title>Node2&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1100.3292,-493.8297C1050.6551,-486.0617 985.3783,-473.0399 965,-456 852.9145,-362.2766 874.6436,-155.4668 884.5985,-91.0492"/>
-<polygon fill="#191970" stroke="#191970" points="888.0753,-91.4747 886.2327,-81.0413 881.1668,-90.3465 888.0753,-91.4747"/>
+<!-- Node2&#45;&gt;Node10 -->
+<g id="edge139" class="edge">
+<title>Node2&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1735.9625,-724.018C1579.3484,-715.3538 971.1394,-678.7744 788,-624 680.0059,-591.7005 625.44,-565.3299 598,-456 555.8051,-287.8821 636.3363,-177.1286 801,-123 882.5168,-96.2036 1128.2566,-83.8604 1250.0387,-79.2651"/>
+<polygon fill="#191970" stroke="#191970" points="1250.5003,-82.7505 1260.364,-78.8829 1250.2413,-75.7553 1250.5003,-82.7505"/>
 </g>
 <!-- Node2&#45;&gt;Node16 -->
-<g id="edge119" class="edge">
+<g id="edge140" class="edge">
 <title>Node2&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1117.811,-492.4531C1094.1872,-485.4945 1065.6212,-474.0509 1045,-456 984.388,-402.9428 918.1587,-352.6553 970,-291 999.0711,-256.4254 1135.9444,-247.8356 1200.3451,-245.7032"/>
-<polygon fill="#191970" stroke="#191970" points="1200.5044,-249.2001 1210.3957,-245.4038 1200.2959,-242.2033 1200.5044,-249.2001"/>
+<path fill="none" stroke="#191970" d="M1810.1315,-725.3892C1970.744,-722.5863 2606.4584,-709.794 2804,-680 2901.6394,-665.2737 2924.4077,-652.3305 3019,-624 3154.1428,-583.5245 3214.8422,-610.3307 3316,-512 3410.6347,-420.0101 3448.5538,-364.4793 3423,-235 3406.5989,-151.897 3398.2076,-121.0656 3333,-67 3312.6571,-50.1331 3285.8875,-36.6198 3265.3145,-27.6747"/>
+<polygon fill="#191970" stroke="#191970" points="3266.6308,-24.4314 3256.0565,-23.7734 3263.9125,-30.8821 3266.6308,-24.4314"/>
 </g>
 <!-- Node2&#45;&gt;Node21 -->
-<g id="edge83" class="edge">
+<g id="edge137" class="edge">
 <title>Node2&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1221.5587,-493.7335C1226.442,-493.1254 1231.3019,-492.5397 1236,-492 1318.7299,-482.4955 1537.5094,-498.7043 1609,-456 1710.4857,-395.3784 1671.0528,-316.1628 1757,-235 1770.7251,-222.039 1788.7355,-211.1162 1803.9915,-203.1712"/>
-<polygon fill="#191970" stroke="#191970" points="1805.7017,-206.2291 1813.0716,-198.6175 1802.5636,-199.9718 1805.7017,-206.2291"/>
-</g>
-<!-- Node2&#45;&gt;Node24 -->
-<g id="edge82" class="edge">
-<title>Node2&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1221.5499,-493.6538C1226.4352,-493.0636 1231.2979,-492.5038 1236,-492 1334.4272,-481.455 1590.1864,-497.6248 1680,-456 1696.6139,-448.3002 1778.1196,-361.4836 1813.0547,-323.7919"/>
-<polygon fill="#191970" stroke="#191970" points="1815.9129,-325.8562 1820.1338,-316.1384 1810.7741,-321.103 1815.9129,-325.8562"/>
-</g>
-<!-- Node2&#45;&gt;Node25 -->
-<g id="edge42" class="edge">
-<title>Node2&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M1153.0313,-492.3906C1146.5764,-484.6068 1137.2168,-473.3202 1128.8739,-463.2597"/>
-<polygon fill="#191970" stroke="#191970" points="1131.5536,-461.008 1122.476,-455.5446 1126.1652,-465.4764 1131.5536,-461.008"/>
-</g>
-<!-- Node26 -->
-<g id="node27" class="node">
-<title>Node26</title>
-<g id="a_node27"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
-<polygon fill="#ffffff" stroke="#000000" points="2159.5,-364 2159.5,-383 2284.5,-383 2284.5,-364 2159.5,-364"/>
-<text text-anchor="middle" x="2222" y="-371" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
-</a>
+<path fill="none" stroke="#191970" d="M1784.9757,-716.3625C1823.5293,-684.4156 1944.0565,-577.1182 1981,-456 1994.8549,-410.577 1931.1802,-423.6516 1890,-400 1859.8296,-382.6718 1828.9471,-357.3071 1808.5564,-339.2838"/>
+<polygon fill="#191970" stroke="#191970" points="1810.801,-336.5956 1801.017,-332.5305 1806.1305,-341.8097 1810.801,-336.5956"/>
 </g>
+<!-- Node2&#45;&gt;Node33 -->
+<g id="edge138" class="edge">
+<title>Node2&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M1810.1922,-720.858C1901.5585,-707.0694 2142.4243,-663.6443 2319,-568 2388.7352,-530.2271 2541.3026,-422.7075 2581,-333 2588.9401,-315.0571 2588.3823,-292.6929 2586.1478,-275.7775"/>
+<polygon fill="#191970" stroke="#191970" points="2589.5472,-274.8703 2584.5097,-265.5497 2582.6353,-275.9774 2589.5472,-274.8703"/>
 </g>
-<!-- Node2&#45;&gt;Node26 -->
-<g id="edge84" class="edge">
-<title>Node2&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1221.8291,-501.0741C1352.9642,-498.4675 1658.3176,-488.9464 1756,-456 1780.0638,-447.8838 1780.1268,-433.6607 1804,-425 1835.6607,-413.5142 2038.2698,-391.8847 2149.3577,-380.6624"/>
-<polygon fill="#191970" stroke="#191970" points="2149.8871,-384.1268 2159.4861,-379.6424 2149.1857,-377.1621 2149.8871,-384.1268"/>
+<!-- Node2&#45;&gt;Node49 -->
+<g id="edge131" class="edge">
+<title>Node2&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M1776.1331,-716.4891C1780.4652,-703.5616 1788.6943,-679.786 1797,-660 1800.8001,-650.9473 1805.4367,-641.1048 1809.4717,-632.8559"/>
+<polygon fill="#191970" stroke="#191970" points="1812.6835,-634.2575 1813.9986,-623.7446 1806.4146,-631.1428 1812.6835,-634.2575"/>
 </g>
-<!-- Node37 -->
-<g id="node36" class="node">
-<title>Node37</title>
-<g id="a_node36"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#000000" points="1484,-425.5 1484,-455.5 1600,-455.5 1600,-425.5 1484,-425.5"/>
-<text text-anchor="start" x="1492" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
-<text text-anchor="middle" x="1542" y="-432.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
+<!-- Node50 -->
+<g id="node40" class="node">
+<title>Node50</title>
+<g id="a_node40"><a xlink:href="runtime_2container_2adt_8h.html" target="_top" xlink:title="Runtime ADT container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="712,-235.5 712,-265.5 838,-265.5 838,-235.5 712,-235.5"/>
+<text text-anchor="start" x="720" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="775" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/adt.h</text>
 </a>
 </g>
 </g>
-<!-- Node2&#45;&gt;Node37 -->
-<g id="edge86" class="edge">
-<title>Node2&#45;&gt;Node37</title>
-<path fill="none" stroke="#191970" d="M1220.1132,-492.4581C1288.7845,-481.3734 1401.8674,-463.1198 1474.0878,-451.4622"/>
-<polygon fill="#191970" stroke="#191970" points="1474.6839,-454.9114 1483.9983,-449.8625 1473.5684,-448.0008 1474.6839,-454.9114"/>
+<!-- Node2&#45;&gt;Node50 -->
+<g id="edge133" class="edge">
+<title>Node2&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M1735.6649,-723.9695C1612.6587,-716.299 1210.3909,-683.3741 902,-568 784.1089,-523.895 710.9689,-520.3198 674,-400 658.2012,-348.5809 709.0503,-298.8718 744.2381,-271.7178"/>
+<polygon fill="#191970" stroke="#191970" points="746.454,-274.431 752.3567,-265.6327 742.2557,-268.8297 746.454,-274.431"/>
 </g>
 <!-- Node4 -->
 <g id="node5" class="node">
 <title>Node4</title>
-<g id="a_node5"><a xlink:href="functor_8h.html" target="_top" xlink:title="Defines the Functor data structures. ">
-<polygon fill="#ffffff" stroke="#000000" points="1290.5,-297 1290.5,-316 1399.5,-316 1399.5,-297 1290.5,-297"/>
-<text text-anchor="middle" x="1345" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/functor.h</text>
+<g id="a_node5"><a xlink:href="ir_2span_8h.html" target="_top" xlink:title="Span information for debugging purposes. ">
+<polygon fill="#ffffff" stroke="#000000" points="1709.5,-548.5 1709.5,-567.5 1790.5,-567.5 1790.5,-548.5 1709.5,-548.5"/>
+<text text-anchor="middle" x="1750" y="-555.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/ir/span.h</text>
 </a>
 </g>
 </g>
 <!-- Node3&#45;&gt;Node4 -->
 <g id="edge4" class="edge">
 <title>Node3&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M1813.317,-366.2662C1715.0396,-353.7241 1513.6163,-328.0187 1409.8961,-314.782"/>
-<polygon fill="#191970" stroke="#191970" points="1410.2296,-311.2962 1399.8669,-313.5021 1409.3434,-318.2399 1410.2296,-311.2962"/>
+<path fill="none" stroke="#191970" d="M2716.3121,-669.4193C2536.8055,-666.6324 1807.9225,-653.5286 1770,-624 1756.1486,-613.2145 1751.6032,-593.1328 1750.2475,-577.9419"/>
+<polygon fill="#191970" stroke="#191970" points="1753.7215,-577.3283 1749.7313,-567.5136 1746.7301,-577.6745 1753.7215,-577.3283"/>
 </g>
-<!-- Node3&#45;&gt;Node12 -->
-<g id="edge41" class="edge">
-<title>Node3&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M1813.2355,-359.5733C1706.5561,-333.5987 1468.7147,-276.6277 1267,-235 1044.8541,-189.1559 988.0626,-184.1529 765,-143 634.2249,-118.8732 478.5175,-89.8716 412.333,-77.5314"/>
-<polygon fill="#191970" stroke="#191970" points="412.6038,-74.0217 402.1316,-75.6291 411.3205,-80.903 412.6038,-74.0217"/>
+<!-- Node3&#45;&gt;Node5 -->
+<g id="edge124" class="edge">
+<title>Node3&#45;&gt;Node5</title>
+<path fill="none" stroke="#191970" d="M2716.3615,-663.913C2595.4471,-645.4263 2220.5688,-588.7125 1909,-548 1791.304,-532.6207 1653.4065,-516.9929 1576.534,-508.4943"/>
+<polygon fill="#191970" stroke="#191970" points="1576.8798,-505.0113 1566.5562,-507.3934 1576.112,-511.9691 1576.8798,-505.0113"/>
+</g>
+<!-- Node3&#45;&gt;Node10 -->
+<g id="edge126" class="edge">
+<title>Node3&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2755.1365,-660.3627C2750.1059,-610.7688 2719.787,-383.4948 2581,-302 2434.411,-215.9239 1991.1907,-223.6842 1823,-199 1684.4762,-178.6698 1648.5117,-178.1957 1513,-143 1456.0516,-128.2091 1391.726,-104.8157 1353.5884,-90.217"/>
+<polygon fill="#191970" stroke="#191970" points="1354.6052,-86.8579 1344.0156,-86.5264 1352.0871,-93.3894 1354.6052,-86.8579"/>
+</g>
+<!-- Node3&#45;&gt;Node16 -->
+<g id="edge129" class="edge">
+<title>Node3&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2795.5272,-664.8576C2840.3781,-658.3198 2915.3066,-645.2544 2977,-624 3276.7793,-520.7212 3440.5436,-496.5792 3550,-199 3570.2524,-143.9399 3588.001,-111.6956 3550,-67 3531.6887,-45.4628 3342.9332,-25.6339 3266.1853,-18.3984"/>
+<polygon fill="#191970" stroke="#191970" points="3266.41,-14.9043 3256.1286,-17.4622 3265.7611,-21.8742 3266.41,-14.9043"/>
 </g>
 <!-- Node17 -->
-<g id="node18" class="node">
+<g id="node15" class="node">
 <title>Node17</title>
-<g id="a_node18"><a xlink:href="array_8h.html" target="_top" xlink:title="Runtime Array container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="2516,-291.5 2516,-321.5 2642,-321.5 2642,-291.5 2516,-291.5"/>
-<text text-anchor="start" x="2524" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="2579" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/array.h</text>
-</a>
-</g>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2315.5,-6 2315.5,-25 2384.5,-25 2384.5,-6 2315.5,-6"/>
+<text text-anchor="middle" x="2350" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">type_traits</text>
 </g>
 <!-- Node3&#45;&gt;Node17 -->
-<g id="edge19" class="edge">
+<g id="edge130" class="edge">
 <title>Node3&#45;&gt;Node17</title>
-<path fill="none" stroke="#191970" d="M1926.5737,-364.2774C1941.9311,-361.9901 1958.5775,-359.7076 1974,-358 2193.2412,-333.7248 2249.6639,-345.4022 2469,-322 2480.9107,-320.7292 2493.5168,-319.1626 2505.7798,-317.5187"/>
-<polygon fill="#191970" stroke="#191970" points="2506.5436,-320.9469 2515.9772,-316.1243 2505.5952,-314.0114 2506.5436,-320.9469"/>
+<path fill="none" stroke="#191970" d="M2767.8419,-660.4312C2777.9635,-651.7668 2792.3757,-638.2154 2802,-624 2817.1809,-601.5773 2816.3093,-593.2849 2826,-568 2890.6573,-399.2978 3013.8766,-345.6081 2944,-179 2916.5288,-113.4998 2894.2105,-97.3637 2830,-67 2753.4521,-30.8021 2498.7059,-19.662 2394.7538,-16.5745"/>
+<polygon fill="#191970" stroke="#191970" points="2394.6863,-13.0713 2384.5904,-16.2842 2394.4864,-20.0684 2394.6863,-13.0713"/>
 </g>
-<!-- Node3&#45;&gt;Node24 -->
-<g id="edge36" class="edge">
-<title>Node3&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1860.6965,-358.2967C1854.6186,-348.3645 1846.6466,-335.3371 1840.2181,-324.8321"/>
-<polygon fill="#191970" stroke="#191970" points="1843.0522,-322.7578 1834.8471,-316.055 1837.0814,-326.4115 1843.0522,-322.7578"/>
+<!-- Node22 -->
+<g id="node20" class="node">
+<title>Node22</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2762,-123.5 2762,-142.5 2826,-142.5 2826,-123.5 2762,-123.5"/>
+<text text-anchor="middle" x="2794" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
 </g>
-<!-- Node5 -->
-<g id="node6" class="node">
-<title>Node5</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2275.5,-179.5 2275.5,-198.5 2364.5,-198.5 2364.5,-179.5 2275.5,-179.5"/>
-<text text-anchor="middle" x="2320" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
+<!-- Node3&#45;&gt;Node22 -->
+<g id="edge127" class="edge">
+<title>Node3&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M2795.6957,-666.1876C2835.4295,-661.0662 2897.3435,-649.4633 2945,-624 3184.8646,-495.8381 3549.42,-378.8744 3365,-179 3347.1042,-159.6046 2966.2578,-140.7452 2836.7035,-134.8694"/>
+<polygon fill="#191970" stroke="#191970" points="2836.534,-131.3584 2826.3867,-134.405 2836.2191,-138.3513 2836.534,-131.3584"/>
+</g>
+<!-- Node3&#45;&gt;Node33 -->
+<g id="edge125" class="edge">
+<title>Node3&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M2761.8623,-660.4588C2789.5695,-614.0363 2902.0425,-409.6592 2804,-302 2783.9987,-280.0368 2711.2875,-266.161 2654.2624,-258.418"/>
+<polygon fill="#191970" stroke="#191970" points="2654.5468,-254.9253 2644.1753,-257.0892 2653.6325,-261.8653 2654.5468,-254.9253"/>
+</g>
+<!-- Node45 -->
+<g id="node36" class="node">
+<title>Node45</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="3122,-308 3122,-327 3166,-327 3166,-308 3122,-308"/>
+<text text-anchor="middle" x="3144" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">limits</text>
+</g>
+<!-- Node3&#45;&gt;Node45 -->
+<g id="edge128" class="edge">
+<title>Node3&#45;&gt;Node45</title>
+<path fill="none" stroke="#191970" d="M2795.7163,-666.0271C2830.5498,-660.9349 2881.2343,-649.4808 2918,-624 3031.6019,-545.2672 3110.3812,-390.7184 3135.4505,-336.6923"/>
+<polygon fill="#191970" stroke="#191970" points="3138.7782,-337.8304 3139.7426,-327.2796 3132.4091,-334.9261 3138.7782,-337.8304"/>
+</g>
+<!-- Node3&#45;&gt;Node49 -->
+<g id="edge117" class="edge">
+<title>Node3&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M2716.3368,-667.6295C2566.7061,-658.6868 2036.5006,-626.999 1869.6538,-617.0273"/>
+<polygon fill="#191970" stroke="#191970" points="1869.3883,-613.5053 1859.1972,-616.4024 1868.9706,-620.4928 1869.3883,-613.5053"/>
 </g>
 <!-- Node4&#45;&gt;Node5 -->
 <g id="edge5" class="edge">
 <title>Node4&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M1391.7505,-296.9669C1467.9404,-281.7515 1623.7719,-251.968 1757,-235 1942.5667,-211.3662 2164.172,-197.4132 2265.0841,-191.8357"/>
-<polygon fill="#191970" stroke="#191970" points="2265.5434,-195.3159 2275.3373,-191.275 2265.161,-188.3264 2265.5434,-195.3159"/>
+<path fill="none" stroke="#191970" d="M1710.2106,-548.4369C1670.502,-538.8932 1609.3896,-524.2052 1566.5545,-513.9101"/>
+<polygon fill="#191970" stroke="#191970" points="1567.1663,-510.4575 1556.6253,-511.5237 1565.5305,-517.2637 1567.1663,-510.4575"/>
+</g>
+<!-- Node4&#45;&gt;Node10 -->
+<g id="edge115" class="edge">
+<title>Node4&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1709.4837,-550.4364C1704.6085,-549.5846 1699.6955,-548.7533 1695,-548 1402.1352,-501.0158 1314.6346,-552.0288 1034,-456 873.5111,-401.0832 787.9961,-412.7931 703,-266 645.3796,-166.4863 1076.9254,-105.0377 1250.1677,-84.5968"/>
+<polygon fill="#191970" stroke="#191970" points="1250.6811,-88.0608 1260.2075,-83.4241 1249.8689,-81.108 1250.6811,-88.0608"/>
+</g>
+<!-- Node4&#45;&gt;Node16 -->
+<g id="edge116" class="edge">
+<title>Node4&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1790.7592,-553.1613C1948.2619,-533.46 2530.2436,-451.348 2971,-266 3030.9176,-240.8033 3053.3582,-243.2182 3101,-199 3151.4502,-152.1753 3139.24,-120.9385 3182,-67 3192.0639,-54.3051 3205.0019,-41.4809 3215.4547,-31.8052"/>
+<polygon fill="#191970" stroke="#191970" points="3217.8198,-34.3852 3222.8748,-25.0741 3213.1166,-29.2006 3217.8198,-34.3852"/>
 </g>
-<!-- Node4&#45;&gt;Node6 -->
+<!-- Node5&#45;&gt;Node6 -->
 <g id="edge6" class="edge">
-<title>Node4&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1349.4703,-296.9248C1362.3846,-269.2625 1399.9206,-188.8614 1417.1576,-151.9403"/>
-<polygon fill="#191970" stroke="#191970" points="1420.3847,-153.3013 1421.4437,-142.7595 1414.0419,-150.3401 1420.3847,-153.3013"/>
+<title>Node5&#45;&gt;Node6</title>
+<path fill="none" stroke="#191970" d="M1501.4974,-492.2455C1487.842,-483.6534 1467.7343,-471.0014 1452.0011,-461.1018"/>
+<polygon fill="#191970" stroke="#191970" points="1453.6529,-458.006 1443.325,-455.6427 1449.9249,-463.9307 1453.6529,-458.006"/>
 </g>
-<!-- Node4&#45;&gt;Node13 -->
-<g id="edge16" class="edge">
-<title>Node4&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1336.2505,-296.586C1322.6484,-281.6438 1295.0644,-253.1716 1267,-235 1153.3933,-161.4402 1001.1295,-107.4279 929.0831,-84.1583"/>
-<polygon fill="#191970" stroke="#191970" points="930.0527,-80.7938 919.4618,-81.08 927.9196,-87.4609 930.0527,-80.7938"/>
+<!-- Node7 -->
+<g id="node8" class="node">
+<title>Node7</title>
+<g id="a_node8"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
+<polygon fill="#ffffff" stroke="#000000" points="1608.5,-369.5 1608.5,-399.5 1721.5,-399.5 1721.5,-369.5 1608.5,-369.5"/>
+<text text-anchor="start" x="1616.5" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="1665" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+</a>
 </g>
-<!-- Node4&#45;&gt;Node14 -->
-<g id="edge17" class="edge">
-<title>Node4&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1399.6244,-304.3757C1566.9151,-297.6699 2065.1106,-276.1417 2135,-255 2199.796,-235.3991 2202.979,-201.0008 2267,-179 2337.3908,-154.8102 2860.9341,-91.7394 2998.8057,-75.4074"/>
-<polygon fill="#191970" stroke="#191970" points="2999.622,-78.8354 3009.1417,-74.1851 2998.7999,-71.8838 2999.622,-78.8354"/>
 </g>
-<!-- Node4&#45;&gt;Node16 -->
-<g id="edge18" class="edge">
-<title>Node4&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M1327.6563,-296.8906C1309.5727,-286.8713 1281.0141,-271.0483 1260.1835,-259.5071"/>
-<polygon fill="#191970" stroke="#191970" points="1261.6654,-256.3268 1251.222,-254.5419 1258.2729,-262.4498 1261.6654,-256.3268"/>
+<!-- Node5&#45;&gt;Node7 -->
+<g id="edge106" class="edge">
+<title>Node5&#45;&gt;Node7</title>
+<path fill="none" stroke="#191970" d="M1523.8459,-492.1109C1533.743,-478.32 1553.1593,-453.0859 1574,-436 1588.7864,-423.8777 1606.81,-413.0125 1622.904,-404.4221"/>
+<polygon fill="#191970" stroke="#191970" points="1624.8578,-407.3513 1632.1178,-399.6348 1621.6304,-401.1397 1624.8578,-407.3513"/>
 </g>
-<!-- Node6&#45;&gt;Node7 -->
-<g id="edge7" class="edge">
-<title>Node6&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1485.5932,-126.213C1584.5735,-114.9402 1782.6716,-92.3791 1891.0021,-80.0414"/>
-<polygon fill="#191970" stroke="#191970" points="1891.6594,-83.4893 1901.1991,-78.8801 1890.8672,-76.5342 1891.6594,-83.4893"/>
+<!-- Node5&#45;&gt;Node10 -->
+<g id="edge110" class="edge">
+<title>Node5&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1467.3369,-495.5807C1387.742,-482.1371 1235.7987,-442.5738 1189,-333 1183.5884,-320.3295 1180.521,-312.8597 1189,-302 1229.5986,-250.0023 1292.4014,-317.9977 1333,-266 1355.8307,-236.759 1301.6238,-195.3216 1290,-143 1288.0722,-134.3227 1287.4671,-131.5204 1290,-123 1293.0326,-112.7984 1299.1244,-102.7475 1305.0309,-94.6555"/>
+<polygon fill="#191970" stroke="#191970" points="1307.823,-96.7667 1311.2139,-86.7291 1302.3036,-92.4613 1307.823,-96.7667"/>
 </g>
 <!-- Node11 -->
 <g id="node12" class="node">
 <title>Node11</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2431.5,-62 2431.5,-81 2556.5,-81 2556.5,-62 2431.5,-62"/>
-<text text-anchor="middle" x="2494" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
+<g id="a_node12"><a xlink:href="c__runtime__api_8h.html" target="_top" xlink:title="tvm/runtime/c_runtime\l_api.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="239.5,-.5 239.5,-30.5 368.5,-30.5 368.5,-.5 239.5,-.5"/>
+<text text-anchor="start" x="247.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/c_runtime</text>
+<text text-anchor="middle" x="304" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_api.h</text>
+</a>
 </g>
-<!-- Node6&#45;&gt;Node11 -->
-<g id="edge11" class="edge">
-<title>Node6&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M1485.7088,-129.5617C1668.7832,-119.0195 2221.8671,-87.1706 2421.3092,-75.6858"/>
-<polygon fill="#191970" stroke="#191970" points="2421.6056,-79.1747 2431.3878,-75.1055 2421.2031,-72.1863 2421.6056,-79.1747"/>
 </g>
-<!-- Node6&#45;&gt;Node12 -->
-<g id="edge12" class="edge">
-<title>Node6&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M1366.3861,-129.495C1172.005,-118.0663 560.5766,-82.1171 412.5221,-73.4121"/>
-<polygon fill="#191970" stroke="#191970" points="412.4795,-69.9037 402.2912,-72.8106 412.0685,-76.8916 412.4795,-69.9037"/>
+<!-- Node5&#45;&gt;Node11 -->
+<g id="edge108" class="edge">
+<title>Node5&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M1467.4786,-500.8119C1218.3523,-494.1951 114,-456.9965 114,-317.5 114,-317.5 114,-317.5 114,-133 114,-76.133 176.3281,-45.8622 229.5875,-30.3764"/>
+<polygon fill="#191970" stroke="#191970" points="230.7078,-33.6979 239.4095,-27.6538 228.838,-26.9522 230.7078,-33.6979"/>
 </g>
-<!-- Node6&#45;&gt;Node13 -->
-<g id="edge13" class="edge">
-<title>Node6&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1366.369,-126.1834C1257.6152,-113.7515 1029.3761,-87.661 932.744,-76.6148"/>
-<polygon fill="#191970" stroke="#191970" points="933.0502,-73.1271 922.7174,-75.4686 932.2551,-80.0818 933.0502,-73.1271"/>
+<!-- Node5&#45;&gt;Node16 -->
+<g id="edge111" class="edge">
+<title>Node5&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1566.7806,-501.1321C1760.7596,-496.3395 2482.3988,-465.3148 3032,-266 3122.4003,-233.216 3173.3402,-231.8992 3210,-143 3224.893,-106.8849 3230.6221,-61.1834 3232.7684,-35.4615"/>
+<polygon fill="#191970" stroke="#191970" points="3236.2833,-35.3843 3233.5225,-25.1555 3229.302,-34.8734 3236.2833,-35.3843"/>
 </g>
-<!-- Node6&#45;&gt;Node14 -->
-<g id="edge14" class="edge">
-<title>Node6&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1485.5436,-130.7198C1748.2281,-120.6606 2799.2722,-80.4121 2999.0998,-72.7599"/>
-<polygon fill="#191970" stroke="#191970" points="2999.3961,-76.2512 3009.2549,-72.371 2999.1282,-69.2563 2999.3961,-76.2512"/>
+<!-- Node5&#45;&gt;Node17 -->
+<g id="edge112" class="edge">
+<title>Node5&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1566.7007,-501.3368C1707.3551,-498.4437 2104.0829,-482.9456 2198,-400 2256.6021,-348.2439 2325.5363,-106.1436 2344.7825,-35.1278"/>
+<polygon fill="#191970" stroke="#191970" points="2348.2205,-35.8203 2347.4356,-25.2545 2341.4603,-34.0037 2348.2205,-35.8203"/>
 </g>
-<!-- Node15 -->
-<g id="node16" class="node">
-<title>Node15</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1521,-62 1521,-81 1571,-81 1571,-62 1521,-62"/>
-<text text-anchor="middle" x="1546" y="-69" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
+<!-- Node5&#45;&gt;Node18 -->
+<g id="edge113" class="edge">
+<title>Node5&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1467.2244,-499.8498C1393.3496,-495.8059 1250.7426,-484.7155 1133,-456 899.6199,-399.0825 785.9395,-446.1225 627,-266 568.0576,-199.2019 575.2698,-113.9162 651,-67 687.8379,-44.1783 985.3147,-23.7581 1085.3312,-17.4782"/>
+<polygon fill="#191970" stroke="#191970" points="1085.692,-20.9627 1095.4555,-16.8487 1085.2575,-13.9761 1085.692,-20.9627"/>
 </g>
-<!-- Node6&#45;&gt;Node15 -->
-<g id="edge15" class="edge">
-<title>Node6&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M1444.75,-123.3906C1464.476,-113.2811 1495.7312,-97.2628 1518.3,-85.6962"/>
-<polygon fill="#191970" stroke="#191970" points="1520.0787,-88.7176 1527.3817,-81.0419 1516.886,-82.4881 1520.0787,-88.7176"/>
+<!-- Node5&#45;&gt;Node20 -->
+<g id="edge114" class="edge">
+<title>Node5&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1467.1867,-499.885C1383.3691,-495.5982 1218.3502,-483.8182 1168,-456 1076.6476,-405.5284 1055.4695,-367.2124 1030,-266 1025.2026,-246.9357 1028.1851,-224.4837 1031.6162,-208.7594"/>
+<polygon fill="#191970" stroke="#191970" points="1035.1143,-209.1884 1034.089,-198.6433 1028.3145,-207.5262 1035.1143,-209.1884"/>
 </g>
-<!-- Node8 -->
-<g id="node9" class="node">
-<title>Node8</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1826.5,-.5 1826.5,-19.5 1919.5,-19.5 1919.5,-.5 1826.5,-.5"/>
-<text text-anchor="middle" x="1873" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dlpack/dlpack.h</text>
+<!-- Node25 -->
+<g id="node23" class="node">
+<title>Node25</title>
+<g id="a_node23"><a xlink:href="runtime_2memory_8h.html" target="_top" xlink:title="Runtime memory management. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1603.5,-123.5 1603.5,-142.5 1732.5,-142.5 1732.5,-123.5 1603.5,-123.5"/>
+<text text-anchor="middle" x="1668" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/memory.h</text>
+</a>
 </g>
-<!-- Node7&#45;&gt;Node8 -->
-<g id="edge8" class="edge">
-<title>Node7&#45;&gt;Node8</title>
-<path fill="none" stroke="#191970" d="M1943.0112,-56.2977C1928.8034,-46.9022 1910.6215,-34.8787 1896.3475,-25.4395"/>
-<polygon fill="#191970" stroke="#191970" points="1897.8845,-22.2598 1887.6128,-19.6633 1894.0233,-28.0986 1897.8845,-22.2598"/>
 </g>
-<!-- Node9 -->
-<g id="node10" class="node">
-<title>Node9</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1938,-.5 1938,-19.5 1994,-19.5 1994,-.5 1938,-.5"/>
-<text text-anchor="middle" x="1966" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stddef.h</text>
+<!-- Node5&#45;&gt;Node25 -->
+<g id="edge109" class="edge">
+<title>Node5&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M1521.0389,-492.1301C1541.3842,-442.4122 1632.7482,-219.145 1660.1849,-152.0978"/>
+<polygon fill="#191970" stroke="#191970" points="1663.4707,-153.3096 1664.0188,-142.7289 1656.9921,-150.6584 1663.4707,-153.3096"/>
 </g>
-<!-- Node7&#45;&gt;Node9 -->
-<g id="edge9" class="edge">
-<title>Node7&#45;&gt;Node9</title>
-<path fill="none" stroke="#191970" d="M1966,-56.2977C1966,-48.3834 1966,-38.6043 1966,-30.0759"/>
-<polygon fill="#191970" stroke="#191970" points="1969.5001,-29.8469 1966,-19.8469 1962.5001,-29.847 1969.5001,-29.8469"/>
+<!-- Node29 -->
+<g id="node26" class="node">
+<title>Node29</title>
+<g id="a_node26"><a xlink:href="structural__hash_8h.html" target="_top" xlink:title="tvm/node/structural\l_hash.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="1286.5,-369.5 1286.5,-399.5 1399.5,-399.5 1399.5,-369.5 1286.5,-369.5"/>
+<text text-anchor="start" x="1294.5" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/structural</text>
+<text text-anchor="middle" x="1343" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_hash.h</text>
+</a>
 </g>
-<!-- Node10 -->
-<g id="node11" class="node">
-<title>Node10</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2012.5,-.5 2012.5,-19.5 2065.5,-19.5 2065.5,-.5 2012.5,-.5"/>
-<text text-anchor="middle" x="2039" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stdint.h</text>
 </g>
-<!-- Node7&#45;&gt;Node10 -->
-<g id="edge10" class="edge">
-<title>Node7&#45;&gt;Node10</title>
-<path fill="none" stroke="#191970" d="M1984.0449,-56.2977C1994.8114,-47.2274 2008.4851,-35.7077 2019.4995,-26.4285"/>
-<polygon fill="#191970" stroke="#191970" points="2021.9191,-28.9667 2027.3118,-19.8469 2017.4089,-23.6132 2021.9191,-28.9667"/>
+<!-- Node5&#45;&gt;Node29 -->
+<g id="edge107" class="edge">
+<title>Node5&#45;&gt;Node29</title>
+<path fill="none" stroke="#191970" d="M1467.3545,-493.3993C1426.4738,-485.3116 1372.9033,-472.0624 1358,-456 1346.6153,-443.7299 1342.9822,-425.1097 1342.1341,-409.9471"/>
+<polygon fill="#191970" stroke="#191970" points="1345.628,-409.5891 1341.9377,-399.6577 1338.6293,-409.7227 1345.628,-409.5891"/>
 </g>
-<!-- Node17&#45;&gt;Node14 -->
-<g id="edge22" class="edge">
-<title>Node17&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2639.9751,-291.4613C2689.129,-278.9879 2751.7153,-262.178 2762,-255 2796.191,-231.1369 2785.2176,-205.988 2817,-179 2873.4864,-131.0346 2955.0536,-97.8888 2999.7306,-82.0813"/>
-<polygon fill="#191970" stroke="#191970" points="3000.9072,-85.3779 3009.2075,-78.7934 2998.6127,-78.7646 3000.9072,-85.3779"/>
+<!-- Node47 -->
+<g id="node37" class="node">
+<title>Node47</title>
+<g id="a_node37"><a xlink:href="repr__printer_8h.html" target="_top" xlink:title="Printer class to print repr string of each AST/IR nodes. ">
+<polygon fill="#ffffff" stroke="#000000" points="1583.5,-436.5 1583.5,-455.5 1714.5,-455.5 1714.5,-436.5 1583.5,-436.5"/>
+<text text-anchor="middle" x="1649" y="-443.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/repr_printer.h</text>
+</a>
 </g>
-<!-- Node17&#45;&gt;Node16 -->
-<g id="edge23" class="edge">
-<title>Node17&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2515.7251,-296.4508C2500.461,-294.3405 2484.1761,-292.3483 2469,-291 2226.0323,-269.4145 1440.4188,-249.8849 1267.7597,-245.7872"/>
-<polygon fill="#191970" stroke="#191970" points="1267.5881,-242.2823 1257.5082,-245.545 1267.4227,-249.2803 1267.5881,-242.2823"/>
 </g>
-<!-- Node18 -->
-<g id="node19" class="node">
-<title>Node18</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2826,-179.5 2826,-198.5 2890,-198.5 2890,-179.5 2826,-179.5"/>
-<text text-anchor="middle" x="2858" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">algorithm</text>
+<!-- Node5&#45;&gt;Node47 -->
+<g id="edge103" class="edge">
+<title>Node5&#45;&gt;Node47</title>
+<path fill="none" stroke="#191970" d="M1539.6916,-492.3733C1561.0711,-483.3032 1593.3057,-469.6279 1617.2362,-459.4755"/>
+<polygon fill="#191970" stroke="#191970" points="1618.7656,-462.6287 1626.6045,-455.5011 1616.0317,-456.1846 1618.7656,-462.6287"/>
+</g>
+<!-- Node6&#45;&gt;Node7 -->
+<g id="edge7" class="edge">
+<title>Node6&#45;&gt;Node7</title>
+<path fill="none" stroke="#191970" d="M1464.7712,-436.4581C1500.3546,-427.2244 1555.1077,-413.0164 1598.3696,-401.7902"/>
+<polygon fill="#191970" stroke="#191970" points="1599.4553,-405.1244 1608.2555,-399.2248 1597.697,-398.3488 1599.4553,-405.1244"/>
+</g>
+<!-- Node6&#45;&gt;Node10 -->
+<g id="edge80" class="edge">
+<title>Node6&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1426.5929,-436.1695C1422.1091,-405.7919 1407.0815,-310.8982 1384,-235 1368.2864,-183.3294 1342.455,-125.0396 1328.9148,-95.8224"/>
+<polygon fill="#191970" stroke="#191970" points="1332.0825,-94.3337 1324.6748,-86.7589 1325.7419,-97.2999 1332.0825,-94.3337"/>
+</g>
+<!-- Node6&#45;&gt;Node11 -->
+<g id="edge76" class="edge">
+<title>Node6&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M1367.2542,-438.8184C1187.5818,-416.8435 661.8671,-347.0586 511,-266 412.9919,-213.3418 395.5434,-180.9843 336,-87 326.6878,-72.3014 318.6906,-54.4354 313.0201,-40.2396"/>
+<polygon fill="#191970" stroke="#191970" points="316.1841,-38.717 309.3168,-30.6492 309.6541,-41.2387 316.1841,-38.717"/>
+</g>
+<!-- Node6&#45;&gt;Node16 -->
+<g id="edge100" class="edge">
+<title>Node6&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1488.7247,-441.6941C1514.9489,-439.8725 1545.9814,-437.7677 1574,-436 1851.2433,-418.5085 1922.0533,-431.9879 2198,-400 2283.8288,-390.0507 2496.5128,-351.0968 2581,-333 2635.7045,-321.2825 2648.7403,-315.6292 2703,-302 2767.4004,-285.8236 2788.538,-295.5526 2848,-266 2971.7786,-204.4821 2967.0212,-135.6343 3087,-67 3124.0104,-45.8281 3171.1955,-31.1996 3201.9816,-23.0859"/>
+<polygon fill="#191970" stroke="#191970" points="3202.9353,-26.4547 3211.755,-20.584 3201.1993,-19.6734 3202.9353,-26.4547"/>
+</g>
+<!-- Node6&#45;&gt;Node17 -->
+<g id="edge101" class="edge">
+<title>Node6&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1488.7269,-441.7294C1514.9516,-439.9139 1545.9837,-437.8041 1574,-436 1640.3165,-431.7295 2121.7908,-444.6606 2171,-400 2244.6402,-333.1667 2165.2347,-269.7071 2206,-179 2234.6091,-115.3417 2296.7832,-58.5119 2329.2766,-31.7365"/>
+<polygon fill="#191970" stroke="#191970" points="2331.6625,-34.3081 2337.2315,-25.2949 2327.2573,-28.868 2331.6625,-34.3081"/>
+</g>
+<!-- Node6&#45;&gt;Node20 -->
+<g id="edge102" class="edge">
+<title>Node6&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1378.2903,-436.4205C1343.9299,-428.8557 1297.6331,-416.767 1259,-400 1155.0199,-354.872 1100.3965,-363.1506 1042,-266 1031.6822,-248.835 1031.7101,-225.4583 1033.519,-208.9788"/>
+<polygon fill="#191970" stroke="#191970" points="1037.019,-209.224 1034.9506,-198.833 1030.0876,-208.2459 1037.019,-209.224"/>
+</g>
+<!-- Node6&#45;&gt;Node25 -->
+<g id="edge78" class="edge">
+<title>Node6&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M1436.0059,-436.0165C1447.7997,-421.3188 1470.55,-393.0019 1490,-369 1527.1903,-323.1059 1554.1853,-321.6485 1574,-266 1584.1476,-237.5011 1566.8458,-228.861 1562,-199 1560.5762,-190.2259 1557.1236,-186.4319 1562,-179 1572.3535,-163.2205 1589.4128,-152.8264 1606.8509,-145.9905"/>
+<polygon fill="#191970" stroke="#191970" points="1608.2874,-149.1945 1616.5375,-142.5472 1605.9428,-142.5988 1608.2874,-149.1945"/>
+</g>
+<!-- Node6&#45;&gt;Node28 -->
+<g id="edge77" class="edge">
+<title>Node6&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M1440.6444,-436.4531C1476.346,-410.0456 1581.8441,-335.7531 1682,-302 1709.9287,-292.5879 1880.6707,-271.6497 1983.8739,-259.5695"/>
+<polygon fill="#191970" stroke="#191970" points="1984.4247,-263.0291 1993.9513,-258.3929 1983.6129,-256.0763 1984.4247,-263.0291"/>
+</g>
+<!-- Node6&#45;&gt;Node29 -->
+<g id="edge42" class="edge">
+<title>Node6&#45;&gt;Node29</title>
+<path fill="none" stroke="#191970" d="M1414.7188,-436.3906C1403.3216,-428.1444 1386.4908,-415.9669 1371.9977,-405.4807"/>
+<polygon fill="#191970" stroke="#191970" points="1373.9468,-402.5709 1363.7933,-399.5446 1369.8435,-408.2421 1373.9468,-402.5709"/>
+</g>
+<!-- Node30 -->
+<g id="node27" class="node">
+<title>Node30</title>
+<g id="a_node27"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="1198.5,-308 1198.5,-327 1323.5,-327 1323.5,-308 1198.5,-308"/>
+<text text-anchor="middle" x="1261" y="-315" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/ndarray.h</text>
+</a>
+</g>
+</g>
+<!-- Node6&#45;&gt;Node30 -->
+<g id="edge79" class="edge">
+<title>Node6&#45;&gt;Node30</title>
+<path fill="none" stroke="#191970" d="M1377.6876,-436.4754C1339.2486,-428.175 1290.6198,-415.1176 1277,-400 1261.6344,-382.9447 1259.1128,-355.6892 1259.4512,-337.2149"/>
+<polygon fill="#191970" stroke="#191970" points="1262.9571,-337.1733 1259.9422,-327.0165 1255.9652,-336.8366 1262.9571,-337.1733"/>
+</g>
+<!-- Node41 -->
+<g id="node34" class="node">
+<title>Node41</title>
+<g id="a_node34"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="2046,-369.5 2046,-399.5 2162,-399.5 2162,-369.5 2046,-369.5"/>
+<text text-anchor="start" x="2054" y="-387.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/packed</text>
+<text text-anchor="middle" x="2104" y="-376.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_func.h</text>
+</a>
+</g>
 </g>
-<!-- Node17&#45;&gt;Node18 -->
+<!-- Node6&#45;&gt;Node41 -->
+<g id="edge81" class="edge">
+<title>Node6&#45;&gt;Node41</title>
+<path fill="none" stroke="#191970" d="M1488.5854,-440.4882C1614.4491,-429.0375 1903.479,-402.7427 2035.7097,-390.7128"/>
+<polygon fill="#191970" stroke="#191970" points="2036.0777,-394.1939 2045.7194,-389.8022 2035.4434,-387.2227 2036.0777,-394.1939"/>
+</g>
+<!-- Node8 -->
+<g id="node9" class="node">
+<title>Node8</title>
+<g id="a_node9"><a xlink:href="functor_8h.html" target="_top" xlink:title="Defines the Functor data structures. ">
+<polygon fill="#ffffff" stroke="#000000" points="1456.5,-241 1456.5,-260 1565.5,-260 1565.5,-241 1456.5,-241"/>
+<text text-anchor="middle" x="1511" y="-248" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/node/functor.h</text>
+</a>
+</g>
+</g>
+<!-- Node7&#45;&gt;Node8 -->
+<g id="edge8" class="edge">
+<title>Node7&#45;&gt;Node8</title>
+<path fill="none" stroke="#191970" d="M1647.6336,-369.389C1618.4414,-343.988 1559.7846,-292.9489 1529.7131,-266.7828"/>
+<polygon fill="#191970" stroke="#191970" points="1531.8791,-264.028 1522.0376,-260.1042 1527.2841,-269.3088 1531.8791,-264.028"/>
+</g>
+<!-- Node7&#45;&gt;Node16 -->
+<g id="edge41" class="edge">
+<title>Node7&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1721.7605,-370.4993C1724.8761,-369.9406 1727.9708,-369.4345 1731,-369 1877.0557,-348.0515 2254.8687,-382.1283 2394,-333 2416.2155,-325.1556 2414.9605,-310.3261 2437,-302 2590.318,-244.0792 2649.4599,-320.5765 2804,-266 2855.1431,-247.9386 2864.478,-233.8963 2906,-199 2967.6203,-147.2125 2960.6306,-107.8277 3030,-67 3084.8779,-34.7014 3159.6122,-22.51 3201.7132,-18.0185"/>
+<polygon fill="#191970" stroke="#191970" points="3202.3223,-21.4756 3211.9312,-17.0124 3201.6363,-14.5092 3202.3223,-21.4756"/>
+</g>
+<!-- Node7&#45;&gt;Node21 -->
 <g id="edge20" class="edge">
-<title>Node17&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2642.1997,-292.8128C2645.1734,-292.1945 2648.1184,-291.5875 2651,-291 2733.9313,-274.0928 2775.1213,-311.6545 2838,-255 2850.9004,-243.3766 2855.603,-223.7402 2857.2559,-208.9005"/>
-<polygon fill="#191970" stroke="#191970" points="2860.7632,-208.9308 2858.0107,-198.6997 2853.7822,-208.4142 2860.7632,-208.9308"/>
+<title>Node7&#45;&gt;Node21</title>
+<path fill="none" stroke="#191970" d="M1691.9303,-369.4639C1708.7188,-360.0903 1730.5306,-347.9121 1748.8183,-337.7015"/>
+<polygon fill="#191970" stroke="#191970" points="1750.7193,-340.6487 1757.7444,-332.7177 1747.3068,-334.5368 1750.7193,-340.6487"/>
+</g>
+<!-- Node7&#45;&gt;Node28 -->
+<g id="edge36" class="edge">
+<title>Node7&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M1669.3783,-369.1737C1675.6846,-350.095 1689.2587,-318.0677 1713,-302 1735.0145,-287.101 1887.3198,-268.8007 1983.6264,-258.5132"/>
+<polygon fill="#191970" stroke="#191970" points="1984.0575,-261.9872 1993.6324,-257.4517 1983.3189,-255.0263 1984.0575,-261.9872"/>
+</g>
+<!-- Node9 -->
+<g id="node10" class="node">
+<title>Node9</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1750.5,-123.5 1750.5,-142.5 1839.5,-142.5 1839.5,-123.5 1750.5,-123.5"/>
+<text text-anchor="middle" x="1795" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">dmlc/logging.h</text>
+</g>
+<!-- Node8&#45;&gt;Node9 -->
+<g id="edge9" class="edge">
+<title>Node8&#45;&gt;Node9</title>
+<path fill="none" stroke="#191970" d="M1534.2409,-240.8845C1584.7089,-220.0042 1704.4386,-170.4682 1762.4189,-146.4799"/>
+<polygon fill="#191970" stroke="#191970" points="1763.934,-149.6408 1771.8363,-142.5836 1761.2578,-143.1726 1763.934,-149.6408"/>
+</g>
+<!-- Node8&#45;&gt;Node10 -->
+<g id="edge10" class="edge">
+<title>Node8&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1496.1039,-240.9455C1469.379,-223.4117 1412.2336,-184.1078 1371,-143 1356.1687,-128.214 1341.8179,-109.0699 1332.1383,-95.211"/>
+<polygon fill="#191970" stroke="#191970" points="1334.769,-92.8576 1326.2348,-86.5793 1328.9911,-96.8093 1334.769,-92.8576"/>
+</g>
+<!-- Node8&#45;&gt;Node17 -->
+<g id="edge17" class="edge">
+<title>Node8&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1565.7773,-246.447C1640.9695,-240.1437 1780.0393,-225.8292 1896,-199 1972.68,-181.259 1991.8985,-174.5585 2064,-143 2128.1442,-114.9244 2138.0414,-95.4957 2202,-67 2236.8299,-51.4821 2277.8836,-37.5192 2307.9063,-28.06"/>
+<polygon fill="#191970" stroke="#191970" points="2309.1329,-31.3439 2317.6402,-25.029 2307.0517,-24.6604 2309.1329,-31.3439"/>
+</g>
+<!-- Node8&#45;&gt;Node18 -->
+<g id="edge18" class="edge">
+<title>Node8&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1456.4328,-246.3434C1384.6935,-238.1497 1258.5829,-213.9688 1180,-143 1171.2675,-135.1136 1140.9887,-67.8724 1126.2237,-34.3464"/>
+<polygon fill="#191970" stroke="#191970" points="1129.4022,-32.8795 1122.1805,-25.1272 1122.9916,-35.6909 1129.4022,-32.8795"/>
+</g>
+<!-- Node8&#45;&gt;Node20 -->
+<g id="edge19" class="edge">
+<title>Node8&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1456.1631,-243.3851C1357.2766,-230.5549 1151.8478,-203.9011 1070.8679,-193.3943"/>
+<polygon fill="#191970" stroke="#191970" points="1071.1012,-189.8953 1060.7339,-192.0794 1070.2004,-196.8371 1071.1012,-189.8953"/>
+</g>
+<!-- Node10&#45;&gt;Node11 -->
+<g id="edge11" class="edge">
+<title>Node10&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M1260.2377,-73.3825C1084.8302,-62.7648 571.6714,-31.7026 378.6053,-20.016"/>
+<polygon fill="#191970" stroke="#191970" points="378.7463,-16.5182 368.5531,-19.4075 378.3233,-23.5054 378.7463,-16.5182"/>
+</g>
+<!-- Node15 -->
+<g id="node13" class="node">
+<title>Node15</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1919.5,-6 1919.5,-25 2044.5,-25 2044.5,-6 1919.5,-6"/>
+<text text-anchor="middle" x="1982" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/logging.h</text>
+</g>
+<!-- Node10&#45;&gt;Node15 -->
+<g id="edge12" class="edge">
+<title>Node10&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M1379.6229,-71.461C1501.1996,-60.1665 1777.3626,-34.5109 1909.2518,-22.2583"/>
+<polygon fill="#191970" stroke="#191970" points="1909.6274,-25.7386 1919.2607,-21.3285 1908.9798,-18.7686 1909.6274,-25.7386"/>
+</g>
+<!-- Node10&#45;&gt;Node16 -->
+<g id="edge13" class="edge">
+<title>Node10&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1379.6458,-75.0835C1675.5205,-65.5765 2980.2909,-23.6521 3201.9429,-16.53"/>
+<polygon fill="#191970" stroke="#191970" points="3202.0699,-20.0279 3211.9523,-16.2084 3201.845,-13.0315 3202.0699,-20.0279"/>
+</g>
+<!-- Node10&#45;&gt;Node17 -->
+<g id="edge14" class="edge">
+<title>Node10&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1379.8292,-73.4277C1567.2053,-62.2397 2140.0278,-28.0372 2305.066,-18.183"/>
+<polygon fill="#191970" stroke="#191970" points="2305.5651,-21.6595 2315.3386,-17.5696 2305.1478,-14.6719 2305.5651,-21.6595"/>
+</g>
+<!-- Node10&#45;&gt;Node18 -->
+<g id="edge15" class="edge">
+<title>Node10&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1288.4375,-67.3906C1250.9098,-55.9651 1188.5932,-36.9925 1150.6108,-25.4285"/>
+<polygon fill="#191970" stroke="#191970" points="1151.1699,-21.9402 1140.584,-22.3758 1149.131,-28.6367 1151.1699,-21.9402"/>
 </g>
 <!-- Node19 -->
-<g id="node20" class="node">
+<g id="node17" class="node">
 <title>Node19</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1766,-235.5 1766,-254.5 1824,-254.5 1824,-235.5 1766,-235.5"/>
-<text text-anchor="middle" x="1795" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1251,-6 1251,-25 1301,-25 1301,-6 1251,-6"/>
+<text text-anchor="middle" x="1276" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">atomic</text>
 </g>
-<!-- Node17&#45;&gt;Node19 -->
+<!-- Node10&#45;&gt;Node19 -->
+<g id="edge16" class="edge">
+<title>Node10&#45;&gt;Node19</title>
+<path fill="none" stroke="#191970" d="M1313.125,-67.3906C1306.6407,-58.3273 1296.7586,-44.5149 1288.8199,-33.4188"/>
+<polygon fill="#191970" stroke="#191970" points="1291.6371,-31.3412 1282.9719,-25.2449 1285.9441,-35.4143 1291.6371,-31.3412"/>
+</g>
+<!-- Node21&#45;&gt;Node18 -->
+<g id="edge23" class="edge">
+<title>Node21&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1746.1038,-302.4961C1657.025,-267.7508 1431.9657,-177.7772 1251,-87 1213.4347,-68.1563 1170.7615,-44.8743 1144.109,-30.1024"/>
+<polygon fill="#191970" stroke="#191970" points="1145.4947,-26.8682 1135.0537,-25.0691 1142.0938,-32.9866 1145.4947,-26.8682"/>
+</g>
+<!-- Node21&#45;&gt;Node20 -->
+<g id="edge24" class="edge">
+<title>Node21&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1721.8536,-316.6104C1583.6661,-313.9745 1258.6119,-303.8247 1156,-266 1133.8943,-257.8514 1132.6491,-247.999 1113,-235 1096.4891,-224.0771 1077.4286,-212.5431 1062.5262,-203.7569"/>
+<polygon fill="#191970" stroke="#191970" points="1064.2424,-200.7059 1053.8447,-198.6714 1060.7042,-206.7459 1064.2424,-200.7059"/>
+</g>
+<!-- Node21&#45;&gt;Node22 -->
 <g id="edge21" class="edge">
-<title>Node17&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M2515.9823,-296.8272C2500.6357,-294.691 2484.2493,-292.5927 2469,-291 2228.5001,-265.8815 1938.225,-251.3863 1834.3329,-246.695"/>
-<polygon fill="#191970" stroke="#191970" points="1834.4443,-243.1966 1824.2979,-246.2463 1834.1315,-250.1896 1834.4443,-243.1966"/>
+<title>Node21&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M1848.1396,-316.9227C2021.2775,-314.8372 2500.7737,-305.553 2653,-266 2669.594,-261.6884 2782.662,-213.3789 2792,-199 2800.7693,-185.4967 2800.5182,-166.9437 2798.505,-152.9466"/>
+<polygon fill="#191970" stroke="#191970" points="2801.9047,-152.0821 2796.6513,-142.8815 2795.0205,-153.35 2801.9047,-152.0821"/>
 </g>
-<!-- Node20 -->
+<!-- Node23 -->
 <g id="node21" class="node">
-<title>Node20</title>
-<g id="a_node21"><a xlink:href="runtime_2container_2base_8h.html" target="_top" xlink:title="Base utilities for common POD(plain old data) container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="2626.5,-235.5 2626.5,-254.5 2681.5,-254.5 2681.5,-235.5 2626.5,-235.5"/>
-<text text-anchor="middle" x="2654" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
-</a>
+<title>Node23</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2058,-179.5 2058,-198.5 2116,-198.5 2116,-179.5 2058,-179.5"/>
+<text text-anchor="middle" x="2087" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">memory</text>
 </g>
+<!-- Node21&#45;&gt;Node23 -->
+<g id="edge22" class="edge">
+<title>Node21&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M1848.3867,-312.7883C1945.5575,-304.9144 2122.1465,-287.7772 2141,-266 2158.2654,-246.0572 2132.2337,-220.6196 2110.6761,-204.5648"/>
+<polygon fill="#191970" stroke="#191970" points="2112.6161,-201.6495 2102.4395,-198.702 2108.5569,-207.3524 2112.6161,-201.6495"/>
+</g>
+<!-- Node24 -->
+<g id="node22" class="node">
+<title>Node24</title>
+<g id="a_node22"><a xlink:href="runtime_2container_2base_8h.html" target="_top" xlink:title="Base utilities for common POD(plain old data) container types. ">
+<polygon fill="#ffffff" stroke="#000000" points="1832.5,-179.5 1832.5,-198.5 1887.5,-198.5 1887.5,-179.5 1832.5,-179.5"/>
+<text text-anchor="middle" x="1860" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./base.h</text>
+</a>
 </g>
-<!-- Node17&#45;&gt;Node20 -->
-<g id="edge24" class="edge">
-<title>Node17&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2597.5393,-291.2977C2608.6008,-282.2274 2622.6491,-270.7077 2633.9653,-261.4285"/>
-<polygon fill="#191970" stroke="#191970" points="2636.4781,-263.8942 2641.9916,-254.8469 2632.0396,-258.4813 2636.4781,-263.8942"/>
 </g>
-<!-- Node20&#45;&gt;Node5 -->
+<!-- Node21&#45;&gt;Node24 -->
 <g id="edge25" class="edge">
-<title>Node20&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2626.149,-240.6163C2577.4502,-232.9098 2473.4137,-216.2633 2374.9039,-199.1308"/>
-<polygon fill="#191970" stroke="#191970" points="2375.2292,-195.6347 2364.7765,-197.3642 2374.0263,-202.5306 2375.2292,-195.6347"/>
+<title>Node21&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M1848.048,-306.5531C1871.2182,-299.1459 1895.1526,-286.7454 1909,-266 1916.6491,-254.5406 1914.4728,-247.6442 1909,-235 1903.7663,-222.9081 1893.6158,-212.5335 1883.9045,-204.7139"/>
+<polygon fill="#191970" stroke="#191970" points="1885.7234,-201.7038 1875.618,-198.5209 1881.5329,-207.3109 1885.7234,-201.7038"/>
 </g>
-<!-- Node20&#45;&gt;Node6 -->
+<!-- Node24&#45;&gt;Node9 -->
+<g id="edge26" class="edge">
+<title>Node24&#45;&gt;Node9</title>
+<path fill="none" stroke="#191970" d="M1848.6779,-179.2455C1839.0777,-170.9746 1825.1108,-158.9416 1813.8305,-149.2232"/>
+<polygon fill="#191970" stroke="#191970" points="1816.053,-146.5182 1806.1924,-142.6427 1811.484,-151.8214 1816.053,-146.5182"/>
+</g>
+<!-- Node24&#45;&gt;Node10 -->
 <g id="edge32" class="edge">
-<title>Node20&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2634.4553,-235.4977C2602.161,-220.3283 2535.2335,-191.1182 2475,-179 2379.6735,-159.8216 1714.57,-140.6583 1495.6687,-134.807"/>
-<polygon fill="#191970" stroke="#191970" points="1495.6759,-131.306 1485.5862,-134.5384 1495.4894,-138.3035 1495.6759,-131.306"/>
+<title>Node24&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1832.3064,-184.873C1783.8152,-177.4993 1680.4627,-161.1268 1594,-143 1557.7078,-135.3914 1549.0357,-131.7431 1513,-123 1464.6028,-111.2577 1409.3124,-98.1065 1370.3517,-88.8832"/>
+<polygon fill="#191970" stroke="#191970" points="1370.8518,-85.4049 1360.3146,-86.5084 1369.2401,-92.2169 1370.8518,-85.4049"/>
 </g>
-<!-- Node20&#45;&gt;Node11 -->
-<g id="edge26" class="edge">
-<title>Node20&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2645.1698,-235.4248C2619.3329,-207.4079 2543.6057,-125.2912 2510.1769,-89.0418"/>
-<polygon fill="#191970" stroke="#191970" points="2512.3524,-86.2381 2503.0001,-81.2595 2507.2065,-90.9836 2512.3524,-86.2381"/>
+<!-- Node24&#45;&gt;Node15 -->
+<g id="edge27" class="edge">
+<title>Node24&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M1866.733,-179.4248C1886.3505,-151.5261 1943.689,-69.9832 1969.3407,-33.5031"/>
+<polygon fill="#191970" stroke="#191970" points="1972.2484,-35.4529 1975.1374,-25.2595 1966.5223,-31.4264 1972.2484,-35.4529"/>
 </g>
-<!-- Node20&#45;&gt;Node14 -->
+<!-- Node24&#45;&gt;Node18 -->
 <g id="edge35" class="edge">
-<title>Node20&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2655.1936,-235.1649C2658.615,-211.5867 2670.7355,-151.6749 2708,-123 2753.2818,-88.1558 2926.5942,-76.2956 2999.4435,-72.7953"/>
-<polygon fill="#191970" stroke="#191970" points="2999.6062,-76.2916 3009.4356,-72.3377 2999.2859,-69.2989 2999.6062,-76.2916"/>
+<title>Node24&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1861.2551,-179.0382C1862.466,-164.6906 1862.372,-138.261 1848,-123 1775.977,-46.522 1272.4836,-34.36 1242,-31 1211.0546,-27.5892 1175.8395,-23.1319 1150.8649,-19.8731"/>
+<polygon fill="#191970" stroke="#191970" points="1151.2012,-16.3873 1140.8308,-18.5563 1150.2904,-23.3278 1151.2012,-16.3873"/>
 </g>
-<!-- Node20&#45;&gt;Node18 -->
+<!-- Node24&#45;&gt;Node22 -->
 <g id="edge33" class="edge">
-<title>Node20&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2681.5123,-237.4476C2716.102,-227.9524 2775.82,-211.5592 2816.1004,-200.5018"/>
-<polygon fill="#191970" stroke="#191970" points="2817.1786,-203.8354 2825.8953,-197.813 2815.3255,-197.0851 2817.1786,-203.8354"/>
+<title>Node24&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M1887.6994,-187.3392C2020.2905,-179.3894 2590.3524,-145.2101 2751.4488,-135.5513"/>
+<polygon fill="#191970" stroke="#191970" points="2752.1228,-139.0172 2761.8953,-134.9249 2751.7037,-132.0298 2752.1228,-139.0172"/>
 </g>
-<!-- Node20&#45;&gt;Node21 -->
-<g id="edge27" class="edge">
-<title>Node20&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2626.2725,-240.8264C2612.149,-238.8218 2594.6889,-236.529 2579,-235 2334.8389,-211.2047 2044.3308,-197.4271 1909.9874,-191.8785"/>
-<polygon fill="#191970" stroke="#191970" points="1909.9441,-188.3739 1899.8092,-191.4616 1909.6575,-195.3681 1909.9441,-188.3739"/>
+<!-- Node24&#45;&gt;Node25 -->
+<g id="edge28" class="edge">
+<title>Node24&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M1832.4911,-180.9766C1800.645,-171.6881 1747.817,-156.28 1710.6383,-145.4362"/>
+<polygon fill="#191970" stroke="#191970" points="1711.1774,-141.9476 1700.5974,-142.5076 1709.2173,-148.6676 1711.1774,-141.9476"/>
 </g>
-<!-- Node23 -->
+<!-- Node27 -->
 <g id="node24" class="node">
-<title>Node23</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2382.5,-179.5 2382.5,-198.5 2465.5,-198.5 2465.5,-179.5 2382.5,-179.5"/>
-<text text-anchor="middle" x="2424" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
+<title>Node27</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="1972.5,-123.5 1972.5,-142.5 2055.5,-142.5 2055.5,-123.5 1972.5,-123.5"/>
+<text text-anchor="middle" x="2014" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">initializer_list</text>
 </g>
-<!-- Node20&#45;&gt;Node23 -->
+<!-- Node24&#45;&gt;Node27 -->
 <g id="edge34" class="edge">
-<title>Node20&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2626.2566,-238.2451C2588.4109,-229.0305 2519.9794,-212.3689 2473.2924,-201.0016"/>
-<polygon fill="#191970" stroke="#191970" points="2473.9314,-197.555 2463.3872,-198.5899 2472.2754,-204.3563 2473.9314,-197.555"/>
+<title>Node24&#45;&gt;Node27</title>
+<path fill="none" stroke="#191970" d="M1886.4735,-179.3733C1911.8597,-170.1419 1950.3645,-156.1402 1978.422,-145.9375"/>
+<polygon fill="#191970" stroke="#191970" points="1979.6701,-149.2079 1987.8719,-142.5011 1977.2778,-142.6293 1979.6701,-149.2079"/>
 </g>
-<!-- Node21&#45;&gt;Node6 -->
-<g id="edge28" class="edge">
-<title>Node21&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M1770.1927,-180.1266C1695.2824,-169.87 1572.4687,-153.0544 1495.5882,-142.528"/>
-<polygon fill="#191970" stroke="#191970" points="1495.9015,-139.0383 1485.5191,-141.1493 1494.9519,-145.9736 1495.9015,-139.0383"/>
+<!-- Node25&#45;&gt;Node10 -->
+<g id="edge29" class="edge">
+<title>Node25&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1608.5721,-123.4369C1547.7634,-113.6516 1453.3476,-98.4582 1389.2072,-88.1368"/>
+<polygon fill="#191970" stroke="#191970" points="1389.6119,-84.657 1379.1828,-86.5237 1388.4997,-91.5681 1389.6119,-84.657"/>
 </g>
-<!-- Node21&#45;&gt;Node13 -->
+<!-- Node25&#45;&gt;Node17 -->
 <g id="edge30" class="edge">
-<title>Node21&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1803.4617,-179.4628C1753.5664,-164.7847 1653.1368,-136.9018 1566,-123 1329.6496,-85.2926 1042.4014,-75.0097 933.1251,-72.3633"/>
-<polygon fill="#191970" stroke="#191970" points="932.8877,-68.857 922.809,-72.1236 932.725,-75.8551 932.8877,-68.857"/>
+<title>Node25&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M1723.2332,-123.484C1854.9588,-100.7893 2185.5002,-43.8412 2305.5471,-23.1587"/>
+<polygon fill="#191970" stroke="#191970" points="2306.2366,-26.5915 2315.4971,-21.4444 2305.048,-19.6932 2306.2366,-26.5915"/>
 </g>
-<!-- Node21&#45;&gt;Node14 -->
+<!-- Node25&#45;&gt;Node18 -->
 <g id="edge31" class="edge">
-<title>Node21&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M1899.7803,-182.641C2120.0775,-161.0162 2835.6465,-90.7745 2998.7142,-74.7674"/>
-<polygon fill="#191970" stroke="#191970" points="2999.4566,-78.2114 3009.0669,-73.7512 2998.7727,-71.2449 2999.4566,-78.2114"/>
+<title>Node25&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1659.9565,-123.3111C1646.851,-108.2934 1619.2924,-79.796 1589,-67 1446.1702,-6.6665 1396.0459,-48.6168 1242,-31 1211.0688,-27.4627 1175.852,-23.0207 1150.8731,-19.7998"/>
+<polygon fill="#191970" stroke="#191970" points="1151.2037,-16.3135 1140.8371,-18.5004 1150.3048,-23.2555 1151.2037,-16.3135"/>
 </g>
-<!-- Node22 -->
-<g id="node23" class="node">
-<title>Node22</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="1810.5,-123.5 1810.5,-142.5 1859.5,-142.5 1859.5,-123.5 1810.5,-123.5"/>
-<text text-anchor="middle" x="1835" y="-130.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstdlib</text>
-</g>
-<!-- Node21&#45;&gt;Node22 -->
-<g id="edge29" class="edge">
-<title>Node21&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1835,-179.2455C1835,-171.9382 1835,-161.6944 1835,-152.7046"/>
-<polygon fill="#191970" stroke="#191970" points="1838.5001,-152.6426 1835,-142.6427 1831.5001,-152.6427 1838.5001,-152.6426"/>
-</g>
-<!-- Node24&#45;&gt;Node7 -->
+<!-- Node28&#45;&gt;Node11 -->
 <g id="edge37" class="edge">
-<title>Node24&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M1837.2655,-296.9288C1852.708,-278.7373 1886.416,-237.537 1909,-199 1929.022,-164.8347 1946.6965,-122.3691 1956.8696,-96.0993"/>
-<polygon fill="#191970" stroke="#191970" points="1960.2178,-97.142 1960.5123,-86.5513 1953.6776,-94.6468 1960.2178,-97.142"/>
+<title>Node28&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M1993.7745,-241.2516C1714.0131,-203.8758 668.1338,-64.1478 378.668,-25.4755"/>
+<polygon fill="#191970" stroke="#191970" points="379.0679,-21.9979 368.6924,-24.1428 378.1409,-28.9363 379.0679,-21.9979"/>
 </g>
-<!-- Node24&#45;&gt;Node11 -->
+<!-- Node28&#45;&gt;Node15 -->
 <g id="edge38" class="edge">
-<title>Node24&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M1854.5037,-296.9947C1891.7016,-283.1661 1963.0482,-256.7745 2024,-235 2184.528,-177.6527 2376.8067,-111.5787 2456.531,-84.2973"/>
-<polygon fill="#191970" stroke="#191970" points="2457.8024,-87.5616 2466.1314,-81.0134 2455.5368,-80.9383 2457.8024,-87.5616"/>
+<title>Node28&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M2048.9756,-240.9942C2031.2107,-227.6947 2004.2413,-202.5174 2016,-179 2027.9257,-155.1486 2052.0743,-166.8514 2064,-143 2067.9752,-135.0495 2066.8452,-131.4212 2064,-123 2051.4639,-85.8954 2020.4356,-51.4921 2000.3456,-32.0308"/>
+<polygon fill="#191970" stroke="#191970" points="2002.6697,-29.4116 1992.9922,-25.0992 1997.8681,-34.5053 2002.6697,-29.4116"/>
 </g>
-<!-- Node24&#45;&gt;Node12 -->
+<!-- Node28&#45;&gt;Node16 -->
 <g id="edge39" class="edge">
-<title>Node24&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M1777.8976,-296.98C1624.7066,-268.5942 1155.7332,-182.805 765,-123 634.6074,-103.0423 478.698,-83.5156 412.3969,-75.4137"/>
-<polygon fill="#191970" stroke="#191970" points="412.5268,-71.9037 402.1769,-74.1687 411.6803,-78.8523 412.5268,-71.9037"/>
+<title>Node28&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2095.1019,-240.9983C2197.3621,-211.0622 2525.026,-117.6926 2802,-67 2950.0519,-39.9031 3129.3684,-23.7865 3201.571,-17.9742"/>
+<polygon fill="#191970" stroke="#191970" points="3202.1332,-21.4407 3211.825,-17.1605 3201.5794,-14.4626 3202.1332,-21.4407"/>
 </g>
-<!-- Node24&#45;&gt;Node13 -->
+<!-- Node28&#45;&gt;Node17 -->
 <g id="edge40" class="edge">
-<title>Node24&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M1790.9462,-296.9967C1641.3929,-259.6481 1094.3044,-123.0213 933.0308,-82.7457"/>
-<polygon fill="#191970" stroke="#191970" points="933.5244,-79.2616 922.9743,-80.2343 931.8283,-86.053 933.5244,-79.2616"/>
+<title>Node28&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M2056.6967,-240.6416C2048.0726,-225.7721 2035.1563,-197.3934 2049,-179 2083.7664,-132.8078 2127.7645,-177.7088 2174,-143 2205.9498,-119.0154 2191.3159,-92.5839 2222,-67 2246.1438,-46.8693 2279.1714,-33.8441 2305.6296,-25.9296"/>
+<polygon fill="#191970" stroke="#191970" points="2306.7573,-29.2477 2315.4161,-23.1423 2304.8398,-22.5154 2306.7573,-29.2477"/>
 </g>
-<!-- Node25&#45;&gt;Node4 -->
+<!-- Node29&#45;&gt;Node8 -->
 <g id="edge43" class="edge">
-<title>Node25&#45;&gt;Node4</title>
-<path fill="none" stroke="#191970" d="M1120.4857,-425.21C1133.8833,-406.7684 1158.7487,-376.0079 1187,-358 1216.7462,-339.0392 1254.1197,-326.4425 1285.0493,-318.4558"/>
-<polygon fill="#191970" stroke="#191970" points="1285.9855,-321.8297 1294.8471,-316.0225 1284.2983,-315.0361 1285.9855,-321.8297"/>
+<title>Node29&#45;&gt;Node8</title>
+<path fill="none" stroke="#191970" d="M1361.9452,-369.389C1393.9273,-343.8794 1458.328,-292.5122 1491.0046,-266.4487"/>
+<polygon fill="#191970" stroke="#191970" points="1493.3236,-269.076 1498.959,-260.1042 1488.9587,-263.6036 1493.3236,-269.076"/>
 </g>
-<!-- Node25&#45;&gt;Node12 -->
-<g id="edge80" class="edge">
-<title>Node25&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M1071.9758,-425.3848C997.4737,-395.4036 827.5561,-325.2183 690,-255 646.0052,-232.542 636.5511,-224.0859 594,-199 525.3654,-158.5367 444.6868,-110.2805 404.8447,-86.4034"/>
-<polygon fill="#191970" stroke="#191970" points="406.4536,-83.2872 396.0771,-81.1475 402.8544,-89.2911 406.4536,-83.2872"/>
+<!-- Node29&#45;&gt;Node16 -->
+<g id="edge75" class="edge">
+<title>Node29&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M1399.825,-380.3173C1649.8174,-361.8191 2642.1765,-287.3344 2705,-266 2853.7091,-215.4995 2853.2379,-130.2229 2997,-67 3066.192,-36.5711 3154.8848,-23.4937 3201.705,-18.3956"/>
+<polygon fill="#191970" stroke="#191970" points="3202.2684,-21.8561 3211.855,-17.3451 3201.5477,-14.8933 3202.2684,-21.8561"/>
 </g>
-<!-- Node25&#45;&gt;Node24 -->
+<!-- Node29&#45;&gt;Node28 -->
 <g id="edge44" class="edge">
-<title>Node25&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1166.7735,-436.9175C1280.1247,-429.4013 1530.7353,-410.8607 1614,-389 1647.3036,-380.2563 1652.993,-370.6938 1685,-358 1720.5449,-343.9031 1761.6712,-329.368 1790.8493,-319.3493"/>
-<polygon fill="#191970" stroke="#191970" points="1792.3123,-322.5482 1800.643,-316.0021 1790.0484,-315.9243 1792.3123,-322.5482"/>
+<title>Node29&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M1375.819,-369.3793C1418.6122,-350.3639 1496.246,-318.2439 1566,-302 1571.0388,-300.8266 1844.7854,-272.7803 1983.7939,-258.5809"/>
+<polygon fill="#191970" stroke="#191970" points="1984.378,-262.0396 1993.9706,-257.5416 1983.6667,-255.0758 1984.378,-262.0396"/>
 </g>
-<!-- Node25&#45;&gt;Node26 -->
+<!-- Node29&#45;&gt;Node30 -->
 <g id="edge45" class="edge">
-<title>Node25&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1166.6696,-437.8934C1311.4862,-431.1141 1706.9189,-411.8584 2036,-389 2073.2049,-386.4157 2114.5518,-383.0222 2148.7963,-380.0694"/>
-<polygon fill="#191970" stroke="#191970" points="2149.5232,-383.5196 2159.1834,-379.1685 2148.9183,-376.5457 2149.5232,-383.5196"/>
-</g>
-<!-- Node36 -->
-<g id="node35" class="node">
-<title>Node36</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="978.5,-297 978.5,-316 1043.5,-316 1043.5,-297 978.5,-297"/>
-<text text-anchor="middle" x="1011" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">functional</text>
-</g>
-<!-- Node25&#45;&gt;Node36 -->
-<g id="edge79" class="edge">
-<title>Node25&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M1098.8359,-425.389C1080.4705,-400.5307 1043.9643,-351.1184 1024.2939,-324.4937"/>
-<polygon fill="#191970" stroke="#191970" points="1026.8529,-322.0674 1018.0956,-316.1042 1021.2228,-326.227 1026.8529,-322.0674"/>
+<title>Node29&#45;&gt;Node30</title>
+<path fill="none" stroke="#191970" d="M1324.3929,-369.2967C1311.3689,-358.6551 1293.9961,-344.4602 1280.731,-333.6217"/>
+<polygon fill="#191970" stroke="#191970" points="1282.6525,-330.6719 1272.6941,-327.055 1278.2234,-336.0926 1282.6525,-330.6719"/>
 </g>
-<!-- Node26&#45;&gt;Node6 -->
+<!-- Node30&#45;&gt;Node10 -->
 <g id="edge71" class="edge">
-<title>Node26&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2176.5568,-363.9555C2110.9853,-350.1174 1996.6865,-325.722 1988,-322 1965.339,-312.2901 1964.778,-300.4322 1942,-291 1864.6084,-258.9526 1838.7223,-273.3835 1757,-255 1658.9843,-232.9512 1630.8666,-237.3279 1538,-199 1504.8116,-185.3025 1469.6213,-163.1424 1447.7329,-148.3266"/>
-<polygon fill="#191970" stroke="#191970" points="1449.7083,-145.4373 1439.4833,-142.6623 1445.746,-151.208 1449.7083,-145.4373"/>
+<title>Node30&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M1304.2611,-307.9422C1326.8197,-300.5254 1352.4672,-287.7104 1366,-266 1396.4158,-217.2045 1305.9366,-175.0722 1295,-143 1289.4092,-126.6046 1297.6178,-108.1894 1306.0895,-94.9854"/>
+<polygon fill="#191970" stroke="#191970" points="1308.9738,-96.9682 1311.8302,-86.7657 1303.2349,-92.9601 1308.9738,-96.9682"/>
 </g>
-<!-- Node26&#45;&gt;Node7 -->
+<!-- Node30&#45;&gt;Node11 -->
 <g id="edge46" class="edge">
-<title>Node26&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2186.4248,-363.996C2138.4794,-349.2066 2054.2732,-315.8543 2012,-255 1977.9768,-206.022 1969.076,-134.2071 1966.7762,-96.5908"/>
-<polygon fill="#191970" stroke="#191970" points="1970.267,-96.3178 1966.2704,-86.5057 1963.2758,-96.6686 1970.267,-96.3178"/>
+<title>Node30&#45;&gt;Node11</title>
+<path fill="none" stroke="#191970" d="M1198.4185,-312.4165C1044.4761,-299.8835 659.3589,-268.3466 653,-266 512.2393,-214.0552 377.1886,-89.0586 325.7979,-37.9005"/>
+<polygon fill="#191970" stroke="#191970" points="328.0911,-35.2431 318.5554,-30.6256 323.1302,-40.1818 328.0911,-35.2431"/>
 </g>
-<!-- Node26&#45;&gt;Node14 -->
-<g id="edge77" class="edge">
-<title>Node26&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2262.955,-363.986C2271.8612,-361.9671 2281.2391,-359.8792 2290,-358 2369.3439,-340.9813 2395.6286,-356.6658 2469,-322 2488.707,-312.689 2487.176,-300.0594 2507,-291 2591.5342,-252.3686 2629.6031,-296.029 2713,-255 2757.88,-232.9202 2758.006,-211.5297 2796,-179 2826.2644,-153.0882 2830.8049,-141.6725 2866,-123 2909.4816,-99.9311 2964.9511,-85.3508 2999.476,-77.8079"/>
-<polygon fill="#191970" stroke="#191970" points="3000.4226,-81.1852 3009.4817,-75.6913 2998.9738,-74.3367 3000.4226,-81.1852"/>
+<!-- Node30&#45;&gt;Node18 -->
+<g id="edge73" class="edge">
+<title>Node30&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1198.3945,-312.2305C1051.2445,-299.7958 695.3132,-269.4116 690,-266 656.8331,-244.7036 646,-228.4155 646,-189 646,-189 646,-189 646,-133 646,-42.3779 977.7711,-21.1186 1085.0033,-16.614"/>
+<polygon fill="#191970" stroke="#191970" points="1085.3815,-20.1019 1095.2345,-16.2071 1085.1032,-13.1074 1085.3815,-20.1019"/>
 </g>
-<!-- Node26&#45;&gt;Node15 -->
-<g id="edge75" class="edge">
-<title>Node26&#45;&gt;Node15</title>
-<path fill="none" stroke="#191970" d="M2180.3055,-363.986C2143.1471,-354.9568 2087.596,-340.0957 2041,-322 2012.2816,-310.8471 2006.9736,-303.9079 1979,-291 1883.8836,-247.1103 1855.5319,-246.1722 1762,-199 1689.4701,-162.42 1607.95,-111.4497 1569.0545,-86.485"/>
-<polygon fill="#191970" stroke="#191970" points="1570.85,-83.4782 1560.5484,-81.003 1567.058,-89.3622 1570.85,-83.4782"/>
+<!-- Node30&#45;&gt;Node19 -->
+<g id="edge72" class="edge">
+<title>Node30&#45;&gt;Node19</title>
+<path fill="none" stroke="#191970" d="M1198.3503,-312.2659C1053.4498,-300.0886 707.1458,-270.5461 703,-266 622.4306,-177.6521 1104.6871,-55.8918 1240.9059,-23.6174"/>
+<polygon fill="#191970" stroke="#191970" points="1241.9005,-26.979 1250.8316,-21.2794 1240.2955,-20.1655 1241.9005,-26.979"/>
 </g>
-<!-- Node26&#45;&gt;Node16 -->
-<g id="edge78" class="edge">
-<title>Node26&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2175.1966,-363.9533C2165.2352,-361.9566 2154.7742,-359.8859 2145,-358 2058.4648,-341.3035 2031.0502,-356.6117 1950,-322 1928.3332,-312.7474 1929.16,-298.9997 1907,-291 1846.5982,-269.195 1395.6749,-250.9871 1267.7455,-246.2187"/>
-<polygon fill="#191970" stroke="#191970" points="1267.8605,-242.7206 1257.738,-245.8486 1267.6018,-249.7159 1267.8605,-242.7206"/>
+<!-- Node30&#45;&gt;Node20 -->
+<g id="edge74" class="edge">
+<title>Node30&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1221.0932,-307.9249C1191.2546,-299.659 1150.2243,-285.885 1118,-266 1091.465,-249.6258 1066.3252,-223.4662 1051.2854,-206.2593"/>
+<polygon fill="#191970" stroke="#191970" points="1053.9091,-203.942 1044.7518,-198.6135 1048.5875,-208.4896 1053.9091,-203.942"/>
 </g>
-<!-- Node26&#45;&gt;Node24 -->
+<!-- Node30&#45;&gt;Node28 -->
 <g id="edge70" class="edge">
-<title>Node26&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M2176.4631,-363.978C2166.135,-361.9174 2155.2035,-359.8143 2145,-358 2041.8338,-339.6562 2015.2923,-339.62 1912,-322 1904.3798,-320.7001 1896.3771,-319.2781 1888.4728,-317.8393"/>
-<polygon fill="#191970" stroke="#191970" points="1889.0174,-314.3809 1878.5495,-316.0159 1887.7522,-321.2656 1889.0174,-314.3809"/>
+<title>Node30&#45;&gt;Node28</title>
+<path fill="none" stroke="#191970" d="M1323.7952,-312.3665C1361.0551,-309.3299 1409.2199,-305.4208 1452,-302 1655.0921,-285.7601 1706.0744,-284.2027 1909,-266 1933.2379,-263.8258 1959.5995,-261.2502 1983.549,-258.8292"/>
+<polygon fill="#191970" stroke="#191970" points="1984.0571,-262.2956 1993.6521,-257.8027 1983.3495,-255.3315 1984.0571,-262.2956"/>
 </g>
-<!-- Node27 -->
+<!-- Node31 -->
 <g id="node28" class="node">
-<title>Node27</title>
+<title>Node31</title>
 <g id="a_node28"><a xlink:href="optional_8h.html" target="_top" xlink:title="Runtime Optional container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="2842,-291.5 2842,-321.5 2968,-321.5 2968,-291.5 2842,-291.5"/>
-<text text-anchor="start" x="2850" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="2905" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1774,-235.5 1774,-265.5 1900,-265.5 1900,-235.5 1774,-235.5"/>
+<text text-anchor="start" x="1782" y="-253.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
+<text text-anchor="middle" x="1837" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/optional.h</text>
 </a>
 </g>
 </g>
-<!-- Node26&#45;&gt;Node27 -->
+<!-- Node30&#45;&gt;Node31 -->
 <g id="edge47" class="edge">
-<title>Node26&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M2284.8493,-368.8922C2390.4124,-360.9197 2609.9499,-343.3399 2795,-322 2806.8995,-320.6278 2819.4998,-319.009 2831.7604,-317.3441"/>
-<polygon fill="#191970" stroke="#191970" points="2832.528,-320.7715 2841.9567,-315.9392 2831.5725,-313.837 2832.528,-320.7715"/>
+<title>Node30&#45;&gt;Node31</title>
+<path fill="none" stroke="#191970" d="M1323.7386,-310.2023C1431.1496,-297.7083 1649.6993,-272.2867 1763.8561,-259.0081"/>
+<polygon fill="#191970" stroke="#191970" points="1764.3724,-262.4717 1773.901,-257.8396 1763.5636,-255.5186 1764.3724,-262.4717"/>
 </g>
-<!-- Node26&#45;&gt;Node28 -->
+<!-- Node30&#45;&gt;Node32 -->
 <g id="edge50" class="edge">
-<title>Node26&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M2284.7806,-367.8006C2366.5485,-359.9689 2514.0989,-344.4422 2649.6915,-322.1291"/>
-<polygon fill="#191970" stroke="#191970" points="2650.5566,-325.5334 2659.8474,-320.4412 2649.4089,-318.6282 2650.5566,-325.5334"/>
+<title>Node30&#45;&gt;Node32</title>
+<path fill="none" stroke="#191970" d="M1261,-307.9005C1261,-299.5099 1261,-286.9451 1261,-275.7085"/>
+<polygon fill="#191970" stroke="#191970" points="1264.5001,-275.5217 1261,-265.5218 1257.5001,-275.5218 1264.5001,-275.5217"/>
 </g>
-<!-- Node29 -->
-<g id="node30" class="node">
-<title>Node29</title>
-<g id="a_node30"><a xlink:href="string_8h.html" target="_top" xlink:title="Runtime String container types. ">
-<polygon fill="#ffffff" stroke="#000000" points="2334,-291.5 2334,-321.5 2460,-321.5 2460,-291.5 2334,-291.5"/>
-<text text-anchor="start" x="2342" y="-309.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/container</text>
-<text text-anchor="middle" x="2397" y="-298.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/string.h</text>
-</a>
-</g>
-</g>
-<!-- Node26&#45;&gt;Node29 -->
+<!-- Node30&#45;&gt;Node33 -->
 <g id="edge54" class="edge">
-<title>Node26&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M2247.0732,-363.9005C2273.395,-353.8231 2315.443,-337.7247 2348.32,-325.1375"/>
-<polygon fill="#191970" stroke="#191970" points="2349.6765,-328.3659 2357.764,-321.5218 2347.1737,-321.8287 2349.6765,-328.3659"/>
+<title>Node30&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M1323.7783,-314.3135C1542.8833,-303.1923 2273.0472,-266.1309 2507.7038,-254.2203"/>
+<polygon fill="#191970" stroke="#191970" points="2508.0276,-257.7085 2517.8373,-253.706 2507.6727,-250.7175 2508.0276,-257.7085"/>
 </g>
-<!-- Node33 -->
-<g id="node34" class="node">
-<title>Node33</title>
-<g id="a_node34"><a xlink:href="serializer_8h.html" target="_top" xlink:title="Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...">
-<polygon fill="#ffffff" stroke="#ff0000" points="2106,-297 2106,-316 2240,-316 2240,-297 2106,-297"/>
-<text text-anchor="middle" x="2173" y="-304" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/serializer.h</text>
-</a>
-</g>
-</g>
-<!-- Node26&#45;&gt;Node33 -->
-<g id="edge72" class="edge">
-<title>Node26&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M2210.0789,-363.9005C2200.6902,-353.751 2188.4022,-337.4941 2180.3369,-324.8679"/>
-<polygon fill="#191970" stroke="#191970" points="2183.2308,-322.8867 2175.1103,-316.0817 2177.2147,-326.4654 2183.2308,-322.8867"/>
-</g>
-<!-- Node26&#45;&gt;Node36 -->
-<g id="edge76" class="edge">
-<title>Node26&#45;&gt;Node36</title>
-<path fill="none" stroke="#191970" d="M2159.1917,-367.7087C2123.1616,-364.5262 2077.0645,-360.6905 2036,-358 1701.2249,-336.066 1617.032,-339.5789 1282,-322 1201.2614,-317.7637 1107.0271,-312.2441 1053.6474,-309.0638"/>
-<polygon fill="#191970" stroke="#191970" points="1053.7231,-305.5622 1043.5323,-308.46 1053.3059,-312.5497 1053.7231,-305.5622"/>
-</g>
-<!-- Node27&#45;&gt;Node14 -->
+<!-- Node31&#45;&gt;Node18 -->
 <g id="edge48" class="edge">
-<title>Node27&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2939.7563,-291.4246C2973.9117,-274.4825 3024.0116,-243.4144 3046,-199 3063.4541,-163.7445 3050.1519,-116.4305 3040.2301,-90.4897"/>
-<polygon fill="#191970" stroke="#191970" points="3043.4125,-89.0236 3036.4109,-81.072 3036.9256,-91.6544 3043.4125,-89.0236"/>
+<title>Node31&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1802.8759,-235.4524C1768.5499,-220.5008 1713.9767,-197.2322 1666,-179 1619.8844,-161.475 1605.6328,-164.0164 1561,-143 1502.52,-115.4634 1497.6093,-89.4652 1437,-67 1410.3734,-57.1307 1226.3121,-30.6567 1150.5105,-20.018"/>
+<polygon fill="#191970" stroke="#191970" points="1150.9493,-16.5454 1140.5607,-18.6253 1149.9789,-23.4778 1150.9493,-16.5454"/>
 </g>
-<!-- Node27&#45;&gt;Node20 -->
+<!-- Node31&#45;&gt;Node24 -->
 <g id="edge49" class="edge">
-<title>Node27&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2843.5985,-291.4554C2796.0685,-279.8096 2731.7715,-264.0556 2691.6029,-254.2135"/>
-<polygon fill="#191970" stroke="#191970" points="2692.2342,-250.7647 2681.6885,-251.7842 2690.5682,-257.5636 2692.2342,-250.7647"/>
+<title>Node31&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M1842.6854,-235.2977C1845.745,-227.1166 1849.5498,-216.943 1852.8128,-208.2179"/>
+<polygon fill="#191970" stroke="#191970" points="1856.0927,-209.4394 1856.3174,-198.8469 1849.5362,-206.9873 1856.0927,-209.4394"/>
 </g>
-<!-- Node28&#45;&gt;Node14 -->
+<!-- Node32&#45;&gt;Node18 -->
 <g id="edge51" class="edge">
-<title>Node28&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2778.9063,-291.408C2804.3203,-283.0249 2834.0004,-271.0067 2858,-255 2929.3824,-207.391 2993.1852,-125.5263 3019.3781,-89.4569"/>
-<polygon fill="#191970" stroke="#191970" points="3022.2278,-91.489 3025.2088,-81.3221 3016.5383,-87.411 3022.2278,-91.489"/>
+<title>Node32&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M1231.6528,-235.3229C1215.5958,-226.1483 1195.9818,-213.4468 1181,-199 1159.1673,-177.9469 1153.7325,-170.9687 1142,-143 1126.8884,-106.9759 1121.2369,-61.2421 1119.1633,-35.4899"/>
+<polygon fill="#191970" stroke="#191970" points="1122.6308,-34.9018 1118.4413,-25.1705 1115.6479,-35.3905 1122.6308,-34.9018"/>
 </g>
-<!-- Node28&#45;&gt;Node16 -->
+<!-- Node32&#45;&gt;Node20 -->
 <g id="edge52" class="edge">
-<title>Node28&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2659.8931,-292.2441C2656.8945,-291.7779 2653.9189,-291.3586 2651,-291 2509.917,-273.6667 1469.6901,-250.1603 1267.8652,-245.7345"/>
-<polygon fill="#191970" stroke="#191970" points="1267.6616,-242.2293 1257.5875,-245.5097 1267.5085,-249.2276 1267.6616,-242.2293"/>
+<title>Node32&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M1206.2034,-235.4554C1163.765,-223.8038 1106.3481,-208.0398 1070.5041,-198.1987"/>
+<polygon fill="#191970" stroke="#191970" points="1071.125,-194.7397 1060.5551,-195.4671 1069.2716,-201.4899 1071.125,-194.7397"/>
 </g>
-<!-- Node28&#45;&gt;Node20 -->
+<!-- Node32&#45;&gt;Node24 -->
 <g id="edge53" class="edge">
-<title>Node28&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2705.9438,-291.2977C2695.8671,-282.3163 2683.096,-270.9334 2672.7389,-261.702"/>
-<polygon fill="#191970" stroke="#191970" points="2674.8417,-258.8879 2665.0478,-254.8469 2670.1841,-264.1135 2674.8417,-258.8879"/>
+<title>Node32&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M1324.2536,-244.0057C1448.7328,-231.2253 1722.0907,-203.1593 1822.255,-192.8753"/>
+<polygon fill="#191970" stroke="#191970" points="1822.9085,-196.3267 1832.4987,-191.8236 1822.1935,-189.3633 1822.9085,-196.3267"/>
 </g>
-<!-- Node29&#45;&gt;Node5 -->
+<!-- Node33&#45;&gt;Node9 -->
 <g id="edge55" class="edge">
-<title>Node29&#45;&gt;Node5</title>
-<path fill="none" stroke="#191970" d="M2389.7343,-291.2227C2382.4417,-276.402 2370.5203,-253.5028 2358,-235 2351.295,-225.0912 2342.9051,-214.7599 2335.7154,-206.4044"/>
-<polygon fill="#191970" stroke="#191970" points="2338.2618,-203.9998 2329.0328,-198.7964 2333.0025,-208.6194 2338.2618,-203.9998"/>
+<title>Node33&#45;&gt;Node9</title>
+<path fill="none" stroke="#191970" d="M2517.869,-237.3828C2441.7891,-221.8984 2309.9561,-196.0967 2196,-179 2072.6423,-160.4928 1926.9522,-145.4822 1849.6307,-138.0538"/>
+<polygon fill="#191970" stroke="#191970" points="1849.9251,-134.5661 1839.6376,-137.0995 1849.2596,-141.5344 1849.9251,-134.5661"/>
 </g>
-<!-- Node29&#45;&gt;Node6 -->
+<!-- Node33&#45;&gt;Node10 -->
 <g id="edge59" class="edge">
-<title>Node29&#45;&gt;Node6</title>
-<path fill="none" stroke="#191970" d="M2333.9401,-293.1719C2279.8991,-281.4742 2206.421,-264.8353 2178,-255 2158.0818,-248.1071 2155.4106,-240.2589 2135,-235 1910.2332,-177.0876 1841.3273,-245.8643 1614,-199 1556.8644,-187.2213 1493.4197,-162.3191 1456.7738,-146.6958"/>
-<polygon fill="#191970" stroke="#191970" points="1457.7442,-143.3026 1447.1759,-142.5538 1454.9706,-149.7297 1457.7442,-143.3026"/>
+<title>Node33&#45;&gt;Node10</title>
+<path fill="none" stroke="#191970" d="M2549.1984,-235.4572C2513.7759,-219.2763 2454.4508,-193.8729 2401,-179 2254.7255,-138.2986 2214.8597,-140.1519 2064,-123 1817.3669,-94.9592 1522.6557,-83.1144 1390.3611,-78.9372"/>
+<polygon fill="#191970" stroke="#191970" points="1390.0784,-75.4268 1379.9745,-78.6145 1389.8609,-82.4234 1390.0784,-75.4268"/>
 </g>
-<!-- Node29&#45;&gt;Node11 -->
+<!-- Node33&#45;&gt;Node15 -->
 <g id="edge57" class="edge">
-<title>Node29&#45;&gt;Node11</title>
-<path fill="none" stroke="#191970" d="M2460.4558,-292.4904C2509.9553,-280.9213 2571.5472,-264.8543 2579,-255 2584.3619,-247.9104 2581.1104,-243.6347 2579,-235 2571.6405,-204.8889 2526.7189,-126.6804 2505.318,-90.4431"/>
-<polygon fill="#191970" stroke="#191970" points="2508.0703,-88.2229 2499.9563,-81.4101 2502.0508,-91.7959 2508.0703,-88.2229"/>
+<title>Node33&#45;&gt;Node15</title>
+<path fill="none" stroke="#191970" d="M2567.5115,-235.2992C2552.4907,-219.2234 2526.7803,-194.137 2500,-179 2349.4397,-93.8986 2148.7588,-46.8189 2047.0354,-26.9589"/>
+<polygon fill="#191970" stroke="#191970" points="2047.6897,-23.5207 2037.208,-25.0651 2046.365,-30.3942 2047.6897,-23.5207"/>
 </g>
-<!-- Node29&#45;&gt;Node12 -->
+<!-- Node33&#45;&gt;Node16 -->
 <g id="edge65" class="edge">
-<title>Node29&#45;&gt;Node12</title>
-<path fill="none" stroke="#191970" d="M2333.8072,-293.5463C2295.9823,-284.7957 2247.375,-271.7773 2206,-255 2189.0381,-248.122 2187.5443,-240.2162 2170,-235 1953.4671,-170.6213 1882.8459,-250.951 1663,-199 1599.7837,-184.0616 1585.9422,-172.3626 1528,-143 1512.7021,-135.2477 1511.4066,-127.9946 1495,-123 1441.861,-106.8233 589.2548,-78.3044 412.0866,-72.5347"/>
-<polygon fill="#191970" stroke="#191970" points="412.1954,-69.0365 402.087,-72.2099 411.968,-76.0328 412.1954,-69.0365"/>
+<title>Node33&#45;&gt;Node16</title>
+<path fill="none" stroke="#191970" d="M2644.036,-236.1669C2692.2842,-224.5602 2751.7755,-208.6111 2759,-199 2779.359,-171.9157 2733.3146,-150.5778 2753,-123 2805.8817,-48.9165 3103.0665,-23.723 3201.9775,-17.3247"/>
+<polygon fill="#191970" stroke="#191970" points="3202.2167,-20.8167 3211.9785,-16.6987 3201.7793,-13.8304 3202.2167,-20.8167"/>
 </g>
-<!-- Node29&#45;&gt;Node13 -->
+<!-- Node33&#45;&gt;Node17 -->
 <g id="edge68" class="edge">
-<title>Node29&#45;&gt;Node13</title>
-<path fill="none" stroke="#191970" d="M2365.0557,-291.404C2327.7887,-274.54 2263.8831,-247.9165 2206,-235 2012.7715,-191.8816 1956.7964,-234.3669 1762,-199 1672.8603,-182.816 1650.0874,-176.7211 1566,-143 1548.286,-135.8963 1546.3293,-128.3181 1528,-123 1415.9323,-90.4846 1059.1086,-76.7078 932.8219,-72.7674"/>
-<polygon fill="#191970" stroke="#191970" points="932.8309,-69.2661 922.7284,-72.4582 932.6165,-76.2628 932.8309,-69.2661"/>
+<title>Node33&#45;&gt;Node17</title>
+<path fill="none" stroke="#191970" d="M2582.2174,-235.288C2584.1159,-200.4732 2583.6324,-114.3622 2538,-67 2518.3259,-46.5801 2443.8489,-30.8258 2394.7486,-22.3791"/>
+<polygon fill="#191970" stroke="#191970" points="2395.1269,-18.8937 2384.6852,-20.6885 2393.9672,-25.797 2395.1269,-18.8937"/>
 </g>
-<!-- Node29&#45;&gt;Node14 -->
+<!-- Node33&#45;&gt;Node18 -->
 <g id="edge67" class="edge">
-<title>Node29&#45;&gt;Node14</title>
-<path fill="none" stroke="#191970" d="M2460.4159,-293.5556C2465.0096,-292.6729 2469.574,-291.8124 2474,-291 2570.1564,-273.3512 2614.3839,-315.7248 2691,-255 2740.8082,-215.5228 2696.4646,-162.819 2746,-123 2784.6452,-91.9352 2932.5498,-78.2133 2999.1522,-73.5159"/>
-<polygon fill="#191970" stroke="#191970" points="2999.7469,-76.9836 3009.4867,-72.8138 2999.2724,-69.9997 2999.7469,-76.9836"/>
+<title>Node33&#45;&gt;Node18</title>
+<path fill="none" stroke="#191970" d="M2557.1947,-235.4138C2530.5839,-219.1949 2485.7354,-193.7584 2444,-179 2325.1732,-136.9806 2290.3341,-143.6511 2166,-123 1977.1838,-91.6389 1929.4206,-86.3666 1739,-67 1518.669,-44.5914 1462.3978,-52.7422 1242,-31 1211.0176,-27.9436 1175.807,-23.4434 1150.8435,-20.0784"/>
+<polygon fill="#191970" stroke="#191970" points="1151.1951,-16.5941 1140.8144,-18.7133 1150.2509,-23.5301 1151.1951,-16.5941"/>
 </g>
-<!-- Node29&#45;&gt;Node16 -->
+<!-- Node33&#45;&gt;Node20 -->
 <g id="edge69" class="edge">
-<title>Node29&#45;&gt;Node16</title>
-<path fill="none" stroke="#191970" d="M2333.6958,-299.0396C2307.512,-296.1702 2276.8237,-293.0856 2249,-291 1864.4277,-262.1728 1396.4343,-249.0336 1267.9905,-245.8117"/>
-<polygon fill="#191970" stroke="#191970" points="1267.7191,-242.304 1257.6355,-245.5551 1267.5457,-249.3019 1267.7191,-242.304"/>
+<title>Node33&#45;&gt;Node20</title>
+<path fill="none" stroke="#191970" d="M2517.5949,-247.9745C2256.254,-237.5648 1267.119,-198.166 1070.9395,-190.3519"/>
+<polygon fill="#191970" stroke="#191970" points="1070.8048,-186.8438 1060.6734,-189.9429 1070.5261,-193.8383 1070.8048,-186.8438"/>
 </g>
-<!-- Node29&#45;&gt;Node18 -->
+<!-- Node33&#45;&gt;Node22 -->
 <g id="edge60" class="edge">
-<title>Node29&#45;&gt;Node18</title>
-<path fill="none" stroke="#191970" d="M2460.1964,-297.5919C2548.1194,-285.0384 2699.3811,-262.803 2724,-255 2764.2908,-242.2298 2807.5933,-218.9681 2833.7415,-203.7501"/>
-<polygon fill="#191970" stroke="#191970" points="2835.7305,-206.6403 2842.5616,-198.5417 2832.1711,-200.6128 2835.7305,-206.6403"/>
+<title>Node33&#45;&gt;Node22</title>
+<path fill="none" stroke="#191970" d="M2585.0976,-235.277C2590.2854,-218.9383 2600.8147,-193.3975 2619,-179 2639.3933,-162.8544 2706.6315,-148.3485 2751.5751,-140.1171"/>
+<polygon fill="#191970" stroke="#191970" points="2752.4153,-143.5223 2761.6393,-138.31 2751.1781,-136.6325 2752.4153,-143.5223"/>
 </g>
-<!-- Node29&#45;&gt;Node19 -->
+<!-- Node33&#45;&gt;Node23 -->
 <g id="edge64" class="edge">
-<title>Node29&#45;&gt;Node19</title>
-<path fill="none" stroke="#191970" d="M2333.9903,-299.8537C2307.7165,-297.0946 2276.8834,-293.8727 2249,-291 2095.521,-275.1878 1912.8063,-256.8112 1834.1669,-248.9236"/>
-<polygon fill="#191970" stroke="#191970" points="1834.5155,-245.441 1824.2162,-247.9258 1833.817,-252.4061 1834.5155,-245.441"/>
+<title>Node33&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M2517.9328,-242.6485C2414.4341,-229.7636 2211.5336,-204.5037 2126.243,-193.8855"/>
+<polygon fill="#191970" stroke="#191970" points="2126.5561,-190.3976 2116.2003,-192.6353 2125.6913,-197.3439 2126.5561,-190.3976"/>
 </g>
-<!-- Node29&#45;&gt;Node20 -->
+<!-- Node33&#45;&gt;Node24 -->
 <g id="edge56" class="edge">
-<title>Node29&#45;&gt;Node20</title>
-<path fill="none" stroke="#191970" d="M2459.8693,-291.4554C2508.9127,-279.7193 2575.3913,-263.811 2616.4504,-253.9856"/>
-<polygon fill="#191970" stroke="#191970" points="2617.4045,-257.3562 2626.3154,-251.6249 2615.7754,-250.5484 2617.4045,-257.3562"/>
+<title>Node33&#45;&gt;Node24</title>
+<path fill="none" stroke="#191970" d="M2517.6285,-244.986C2483.3904,-242.0136 2440.3822,-238.2906 2402,-235 2212.5807,-218.7608 1985.8974,-199.6154 1897.6977,-192.1771"/>
+<polygon fill="#191970" stroke="#191970" points="1897.8987,-188.6817 1887.6399,-191.329 1897.3105,-195.657 1897.8987,-188.6817"/>
 </g>
-<!-- Node29&#45;&gt;Node21 -->
+<!-- Node33&#45;&gt;Node25 -->
 <g id="edge58" class="edge">
-<title>Node29&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2385.873,-291.4069C2372.3874,-274.4124 2347.9655,-247.538 2320,-235 2248.642,-203.0076 2027.3128,-193.2323 1909.9054,-190.2704"/>
-<polygon fill="#191970" stroke="#191970" points="1909.6477,-186.7632 1899.5659,-190.0204 1909.4784,-193.7612 1909.6477,-186.7632"/>
+<title>Node33&#45;&gt;Node25</title>
+<path fill="none" stroke="#191970" d="M2517.8557,-244.8235C2441.2074,-237.2977 2308.2686,-222.1997 2196,-199 2163.8947,-192.3656 2157.2301,-184.999 2125,-179 1959.6885,-148.2302 1913.0525,-162.082 1742.9107,-143.1702"/>
+<polygon fill="#191970" stroke="#191970" points="1743.1912,-139.6797 1732.8606,-142.0312 1742.4028,-146.6351 1743.1912,-139.6797"/>
 </g>
-<!-- Node29&#45;&gt;Node23 -->
+<!-- Node33&#45;&gt;Node27 -->
 <g id="edge63" class="edge">
-<title>Node29&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2400.5298,-291.1389C2405.3943,-269.9692 2414.1611,-231.8174 2419.4936,-208.6112"/>
-<polygon fill="#191970" stroke="#191970" points="2422.9749,-209.0893 2421.8034,-198.5595 2416.1527,-207.5216 2422.9749,-209.0893"/>
+<title>Node33&#45;&gt;Node27</title>
+<path fill="none" stroke="#191970" d="M2539.4729,-235.4207C2493.3038,-219.2078 2416.235,-193.7767 2348,-179 2226.2352,-152.6312 2193.1367,-161.9561 2070,-143 2068.5358,-142.7746 2067.051,-142.5413 2065.5529,-142.3018"/>
+<polygon fill="#191970" stroke="#191970" points="2065.9899,-138.8266 2055.553,-140.6487 2064.8482,-145.7328 2065.9899,-138.8266"/>
 </g>
-<!-- Node30 -->
+<!-- Node34 -->
 <g id="node31" class="node">
-<title>Node30</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2516.5,-235.5 2516.5,-254.5 2569.5,-254.5 2569.5,-235.5 2516.5,-235.5"/>
-<text text-anchor="middle" x="2543" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstddef</text>
+<title>Node34</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2628.5,-179.5 2628.5,-198.5 2681.5,-198.5 2681.5,-179.5 2628.5,-179.5"/>
+<text text-anchor="middle" x="2655" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstddef</text>
 </g>
-<!-- Node29&#45;&gt;Node30 -->
+<!-- Node33&#45;&gt;Node34 -->
 <g id="edge61" class="edge">
-<title>Node29&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M2432.7156,-291.4554C2456.6487,-281.374 2487.8905,-268.2139 2511.0094,-258.4755"/>
-<polygon fill="#191970" stroke="#191970" points="2512.5181,-261.6379 2520.3752,-254.5303 2509.8007,-255.1868 2512.5181,-261.6379"/>
+<title>Node33&#45;&gt;Node34</title>
+<path fill="none" stroke="#191970" d="M2599.2921,-235.2977C2610.2061,-226.2274 2624.0671,-214.7077 2635.2324,-205.4285"/>
+<polygon fill="#191970" stroke="#191970" points="2637.698,-207.9303 2643.1517,-198.8469 2633.2238,-202.5468 2637.698,-207.9303"/>
 </g>
-<!-- Node31 -->
+<!-- Node35 -->
 <g id="node32" class="node">
-<title>Node31</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2448,-235.5 2448,-254.5 2498,-254.5 2498,-235.5 2448,-235.5"/>
-<text text-anchor="middle" x="2473" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstring</text>
+<title>Node35</title>
+<polygon fill="#ffffff" stroke="#bfbfbf" points="2700,-179.5 2700,-198.5 2750,-198.5 2750,-179.5 2700,-179.5"/>
+<text text-anchor="middle" x="2725" y="-186.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">cstring</text>
 </g>
-<!-- Node29&#45;&gt;Node31 -->
+<!-- Node33&#45;&gt;Node35 -->
 <g id="edge62" class="edge">
-<title>Node29&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M2415.7865,-291.2977C2427.1053,-282.1384 2441.5105,-270.4816 2453.0346,-261.1562"/>
-<polygon fill="#191970" stroke="#191970" points="2455.2595,-263.8583 2460.8314,-254.8469 2450.8561,-258.4167 2455.2595,-263.8583"/>
-</g>
-<!-- Node32 -->
-<g id="node33" class="node">
-<title>Node32</title>
-<polygon fill="#ffffff" stroke="#bfbfbf" points="2032.5,-235.5 2032.5,-254.5 2125.5,-254.5 2125.5,-235.5 2032.5,-235.5"/>
-<text text-anchor="middle" x="2079" y="-242.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">unordered_map</text>
+<title>Node33&#45;&gt;Node35</title>
+<path fill="none" stroke="#191970" d="M2616.2264,-235.4554C2639.8316,-225.374 2670.6454,-212.2139 2693.4476,-202.4755"/>
+<polygon fill="#191970" stroke="#191970" points="2694.8634,-205.6767 2702.6851,-198.5303 2692.114,-199.2393 2694.8634,-205.6767"/>
 </g>
-<!-- Node29&#45;&gt;Node32 -->
+<!-- Node33&#45;&gt;Node36 -->
 <g id="edge66" class="edge">
-<title>Node29&#45;&gt;Node32</title>
-<path fill="none" stroke="#191970" d="M2333.9781,-293.7602C2329.2491,-292.8206 2324.5497,-291.8921 2320,-291 2256.9764,-278.6423 2184.592,-264.8846 2135.7348,-255.6627"/>
-<polygon fill="#191970" stroke="#191970" points="2136.1537,-252.18 2125.6782,-253.7658 2134.8561,-259.0587 2136.1537,-252.18"/>
+<title>Node33&#45;&gt;Node36</title>
+<path fill="none" stroke="#191970" d="M2644.015,-245.1766C2783.891,-233.3602 3118.4886,-205.094 3252.2903,-193.7907"/>
+<polygon fill="#191970" stroke="#191970" points="3252.6621,-197.2719 3262.3319,-192.9424 3252.0728,-190.2967 3252.6621,-197.2719"/>
 </g>
-<!-- Node33&#45;&gt;Node7 -->
-<g id="edge73" class="edge">
-<title>Node33&#45;&gt;Node7</title>
-<path fill="none" stroke="#191970" d="M2105.661,-296.9866C2077.1354,-289.7895 2045.5759,-277.1586 2024,-255 2018.2998,-249.1459 1987.4369,-145.0651 1973.201,-96.3136"/>
-<polygon fill="#191970" stroke="#191970" points="1976.51,-95.1582 1970.3524,-86.5365 1969.7894,-97.1163 1976.51,-95.1582"/>
-</g>
-<!-- Node33&#45;&gt;Node26 -->
-<g id="edge74" class="edge">
-<title>Node33&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2184.9046,-316.0817C2194.2899,-326.2241 2206.5787,-342.4795 2214.649,-355.1099"/>
-<polygon fill="#191970" stroke="#191970" points="2211.7585,-357.0969 2219.8801,-363.9005 2217.7739,-353.5171 2211.7585,-357.0969"/>
... 64594 lines suppressed ...