You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2022/09/09 21:57:11 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@029fa462d22ce3c75bc5ea530eece999a160c05b)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 0d19f999e deploying docs (apache/tvm@029fa462d22ce3c75bc5ea530eece999a160c05b)
0d19f999e is described below

commit 0d19f999e4cf7d754524d6dddbe904fb69fe3a65
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Fri Sep 9 21:57:02 2022 +0000

    deploying docs (apache/tvm@029fa462d22ce3c75bc5ea530eece999a160c05b)
---
 .../how_to/compile_models/from_darknet.rst.txt     |    2 +-
 .../how_to/compile_models/from_keras.rst.txt       |    2 +-
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   18 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |    8 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 | 1014 ++-
 .../tune_network_cuda.rst.txt                      |    2 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |   35 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |    6 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |   26 +-
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |   10 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    8 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |    2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   14 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |   14 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |   20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   54 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   26 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   45 +-
 docs/commit_hash                                   |    2 +-
 docs/how_to/compile_models/from_darknet.html       |    2 +-
 docs/how_to/compile_models/from_keras.html         |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       |   13 +-
 docs/how_to/compile_models/from_pytorch.html       |    7 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   43 +-
 docs/how_to/deploy_models/deploy_prequantized.html |    8 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   37 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   18 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |    8 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   16 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    | 1014 ++-
 .../tune_with_autoscheduler/tune_network_cuda.html |    2 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |   35 +-
 .../tune_with_autotvm/sg_execution_times.html      |    6 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |   26 +-
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |   10 +-
 .../how_to/work_with_relay/sg_execution_times.html |    8 +-
 docs/how_to/work_with_schedules/intrin_math.html   |    2 +-
 .../work_with_schedules/sg_execution_times.html    |   14 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 docs/install/nnpack.html                           |   12 +-
 ..._2tvm_2script_2ir_builder_2base_8h-example.html |    4 +-
 docs/reference/api/doxygen/annotated.html          |   19 +-
 docs/reference/api/doxygen/array_8h__dep__incl.svg |  390 +-
 .../api/doxygen/attr__registry__map_8h.html        |    2 +-
 .../doxygen/attr__registry__map_8h__dep__incl.svg  | 1046 ++-
 .../auto__scheduler_2feature_8h_source.html        |    3 +-
 .../api/doxygen/c__runtime__api_8h__dep__incl.svg  |  570 +-
 docs/reference/api/doxygen/classes.html            |  479 +-
 .../doxygen/classtvm_1_1runtime_1_1ObjectRef.html  |    2 +-
 ...asstvm_1_1runtime_1_1ObjectRef__coll__graph.svg |   12 +-
 .../classtvm_1_1runtime_1_1Object__coll__graph.svg |    8 +-
 ...1_1script_1_1ir__builder_1_1IRBuilderFrame.html |    2 +-
 ...cript_1_1ir__builder_1_1IRBuilderFrameNode.html |    4 +-
 ..._builder_1_1IRBuilderFrameNode__coll__graph.svg |   16 +-
 ...ilder_1_1IRBuilderFrameNode__inherit__graph.svg |  216 +-
 ...1ir__builder_1_1IRBuilderFrame__coll__graph.svg |   20 +-
 ...__builder_1_1IRBuilderFrame__inherit__graph.svg |  151 +-
 ..._1_1script_1_1ir__builder_1_1IRBuilderNode.html |    2 +-
 ...r__builder_1_1ir_1_1IRModuleFrame-members.html} |    8 +-
 ...ipt_1_1ir__builder_1_1ir_1_1IRModuleFrame.html} |   38 +-
 ...uilder_1_1ir_1_1IRModuleFrameNode-members.html} |   18 +-
 ...1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html} |   84 +-
 ...er_1_1ir_1_1IRModuleFrameNode__coll__graph.svg} |    6 +-
 ...1_1ir_1_1IRModuleFrameNode__inherit__graph.svg} |    6 +-
 ...uilder_1_1ir_1_1IRModuleFrame__coll__graph.svg} |    6 +-
 ...der_1_1ir_1_1IRModuleFrame__inherit__graph.svg} |    6 +-
 ...builder_1_1tir_1_1AssertFrameNode-members.html} |   34 +-
 ..._1_1ir__builder_1_1tir_1_1AssertFrameNode.html} |  112 +-
 ...lder_1_1tir_1_1AssertFrameNode__coll__graph.svg |  276 +
 ...r_1_1tir_1_1AssertFrameNode__inherit__graph.svg |  126 +
 ...__builder_1_1tir_1_1PrimFuncFrame-members.html} |   10 +-
 ...pt_1_1ir__builder_1_1tir_1_1PrimFuncFrame.html} |   50 +-
 ...ilder_1_1tir_1_1PrimFuncFrameNode-members.html} |   52 +-
 ..._1ir__builder_1_1tir_1_1PrimFuncFrameNode.html} |  226 +-
 ...er_1_1tir_1_1PrimFuncFrameNode__coll__graph.svg |  490 ++
 ...1_1tir_1_1PrimFuncFrameNode__inherit__graph.svg |  132 +
 ...uilder_1_1tir_1_1PrimFuncFrame__coll__graph.svg |  140 +
 ...der_1_1tir_1_1PrimFuncFrame__inherit__graph.svg |  110 +
 ...1_1ir__builder_1_1tir_1_1TIRFrame-members.html} |   17 +-
 ...1script_1_1ir__builder_1_1tir_1_1TIRFrame.html} |   90 +-
 ...r__builder_1_1tir_1_1TIRFrameNode-members.html} |   39 +-
 ...ipt_1_1ir__builder_1_1tir_1_1TIRFrameNode.html} |  123 +-
 ...builder_1_1tir_1_1TIRFrameNode__coll__graph.svg |  203 +
 ...lder_1_1tir_1_1TIRFrameNode__inherit__graph.svg |  156 +
 ...r__builder_1_1tir_1_1TIRFrame__coll__graph.svg} |  163 +-
 ..._builder_1_1tir_1_1TIRFrame__inherit__graph.svg |  110 +
 .../api/doxygen/data__type_8h__dep__incl.svg       |  660 +-
 .../api/doxygen/dataflow__matcher_8h_source.html   |    4 +-
 .../api/doxygen/detail_2broadcast_8h_source.html   |    2 +-
 .../api/doxygen/detail_2extern_8h_source.html      |    4 +-
 .../api/doxygen/diagnostic_8h__dep__incl.svg       |   48 +-
 docs/reference/api/doxygen/dilate_8h_source.html   |    2 +-
 docs/reference/api/doxygen/dir_000003_000028.html  |   73 -
 docs/reference/api/doxygen/dir_000003_000029.html  |    4 +-
 ...r_000003_000029.html => dir_000003_000030.html} |    0
 ...r_000004_000028.html => dir_000004_000029.html} |    0
 docs/reference/api/doxygen/dir_000005_000028.html  |   73 -
 docs/reference/api/doxygen/dir_000005_000029.html  |    4 +-
 ...r_000005_000029.html => dir_000005_000030.html} |    0
 docs/reference/api/doxygen/dir_000006_000028.html  |   73 -
 docs/reference/api/doxygen/dir_000006_000029.html  |    4 +-
 ...r_000006_000029.html => dir_000006_000030.html} |    0
 ...r_000007_000028.html => dir_000007_000029.html} |    0
 ...r_000011_000028.html => dir_000011_000029.html} |    0
 ...r_000014_000028.html => dir_000014_000029.html} |    0
 ...r_000015_000028.html => dir_000015_000029.html} |    0
 docs/reference/api/doxygen/dir_000016_000028.html  |   73 -
 docs/reference/api/doxygen/dir_000016_000029.html  |    4 +-
 ...r_000016_000029.html => dir_000016_000030.html} |    0
 docs/reference/api/doxygen/dir_000020_000007.html  |    2 +-
 ...r_000028_000011.html => dir_000020_000011.html} |    6 +-
 docs/reference/api/doxygen/dir_000020_000017.html  |    2 +-
 docs/reference/api/doxygen/dir_000021_000007.html  |    2 +-
 ...r_000027_000008.html => dir_000021_000011.html} |    6 +-
 docs/reference/api/doxygen/dir_000021_000017.html  |    2 +-
 docs/reference/api/doxygen/dir_000026_000007.html  |    2 +-
 docs/reference/api/doxygen/dir_000026_000017.html  |    2 +-
 ...r_000027_000013.html => dir_000027_000011.html} |    6 +-
 docs/reference/api/doxygen/dir_000027_000017.html  |   73 -
 ...r_000027_000007.html => dir_000027_000026.html} |    6 +-
 docs/reference/api/doxygen/dir_000028_000007.html  |    6 +-
 docs/reference/api/doxygen/dir_000028_000008.html  |    6 +-
 docs/reference/api/doxygen/dir_000028_000013.html  |    6 +-
 docs/reference/api/doxygen/dir_000028_000017.html  |    6 +-
 ...r_000028_000007.html => dir_000029_000007.html} |    0
 docs/reference/api/doxygen/dir_000029_000008.html  |    6 +-
 docs/reference/api/doxygen/dir_000029_000011.html  |    6 +-
 docs/reference/api/doxygen/dir_000029_000013.html  |    6 +-
 ...r_000028_000017.html => dir_000029_000017.html} |    0
 ...r_000029_000002.html => dir_000030_000002.html} |    0
 ...r_000029_000008.html => dir_000030_000008.html} |    0
 ...r_000029_000011.html => dir_000030_000011.html} |    0
 docs/reference/api/doxygen/dir_000030_000013.html  |    6 +-
 docs/reference/api/doxygen/dir_000031_000002.html  |   73 -
 ...r_000030_000007.html => dir_000031_000007.html} |    0
 ...r_000030_000013.html => dir_000031_000013.html} |    0
 ...r_000030_000017.html => dir_000031_000017.html} |    0
 docs/reference/api/doxygen/dir_000032_000002.html  |    6 +-
 ...r_000031_000008.html => dir_000032_000008.html} |    0
 ...r_000032_000002.html => dir_000033_000002.html} |    0
 ...r_000032_000011.html => dir_000033_000011.html} |    0
 ...r_000032_000029.html => dir_000033_000030.html} |    0
 docs/reference/api/doxygen/dir_000033_000032.html  |   73 -
 docs/reference/api/doxygen/dir_000034_000028.html  |   73 -
 ...r_000033_000029.html => dir_000034_000030.html} |    0
 docs/reference/api/doxygen/dir_000034_000033.html  |    6 +-
 docs/reference/api/doxygen/dir_000035_000029.html  |    6 +-
 ...r_000034_000029.html => dir_000035_000030.html} |    0
 docs/reference/api/doxygen/dir_000035_000032.html  |   73 -
 ...r_000034_000032.html => dir_000035_000033.html} |    0
 ...r_000034_000033.html => dir_000035_000034.html} |    0
 ...r_000034_000035.html => dir_000035_000036.html} |    0
 ...r_000034_000037.html => dir_000035_000038.html} |    0
 ...r_000035_000002.html => dir_000036_000002.html} |    0
 docs/reference/api/doxygen/dir_000036_000028.html  |   73 -
 ...r_000035_000029.html => dir_000036_000030.html} |    0
 docs/reference/api/doxygen/dir_000036_000033.html  |    6 +-
 docs/reference/api/doxygen/dir_000037_000028.html  |   73 -
 docs/reference/api/doxygen/dir_000037_000029.html  |    6 +-
 ...r_000036_000029.html => dir_000037_000030.html} |    0
 ...r_000036_000032.html => dir_000037_000033.html} |    0
 ...r_000036_000033.html => dir_000037_000034.html} |    0
 ...r_000036_000034.html => dir_000037_000035.html} |    0
 ...r_000036_000035.html => dir_000037_000036.html} |    0
 ...r_000036_000037.html => dir_000037_000038.html} |    0
 docs/reference/api/doxygen/dir_000038_000028.html  |   73 -
 docs/reference/api/doxygen/dir_000038_000029.html  |    6 +-
 ...r_000037_000029.html => dir_000038_000030.html} |    0
 ...r_000037_000032.html => dir_000038_000033.html} |    0
 docs/reference/api/doxygen/dir_000039_000029.html  |    6 +-
 ...r_000038_000029.html => dir_000039_000030.html} |    0
 ...r_000038_000032.html => dir_000039_000033.html} |    0
 ...r_000039_000029.html => dir_000040_000030.html} |    0
 ...r_000039_000032.html => dir_000040_000033.html} |    0
 .../dir_006b1f4ac353a18abb55f74cc4796db6_dep.svg   |    6 +-
 .../dir_02be2c9d68e402f80df60bd528724ee5_dep.svg   |   22 +-
 .../dir_05ffda4d144d7985f926507abde48dbb_dep.svg   |   12 +-
 .../dir_1f1b12d204a071c9e67e47fcbb552b86_dep.svg   |   10 +-
 .../dir_2b0ef9f1c86b565a92e96353e1195b2c_dep.svg   |    8 +-
 .../dir_3a038e7bfa2370c6aee2a5aecd5d3ef1_dep.svg   |   12 +-
 .../dir_4378f18824ae7d4ad48f8d7785cd7ac8_dep.svg   |   16 +-
 .../dir_519be2d4a83a987dbf989f1de527b870_dep.svg   |   10 +-
 .../dir_54983dd6d74c59f67ee9e8e5a50aafc4_dep.svg   |   42 +-
 .../dir_5da96592f3a7c442b838b075c58254c2_dep.svg   |   14 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad_dep.svg   |   20 +-
 ...l => dir_67fdee7a5e0396034822418fa5baa4b4.html} |   14 +-
 .../dir_67fdee7a5e0396034822418fa5baa4b4_dep.svg   |   68 +
 .../dir_72c2f11201cd7636dc7624de0754daa5_dep.svg   |   22 +-
 .../dir_8395ded0a3205c0748976a0d4487d38d_dep.svg   |    8 +-
 .../dir_84875704194fd544d29fe0c7fedd8939.html      |    2 +-
 .../dir_84875704194fd544d29fe0c7fedd8939_dep.svg   |  201 +-
 .../dir_8e4e25e66b8623d88c5b5dd2040bca97_dep.svg   |   74 +-
 .../dir_9e615ec4a59e46584bcc4e2226e148a2.html      |    4 +-
 .../dir_9e615ec4a59e46584bcc4e2226e148a2_dep.svg   |  112 +-
 .../dir_a59a89c7dd2e4e6561fe59bf359ce2f3_dep.svg   |    8 +-
 .../dir_a98464176f1216e334ac3bbacd433085_dep.svg   |   16 +-
 .../dir_ac57496531ccbad72f774fa62e6de987_dep.svg   |   28 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5.html      |    2 +-
 .../dir_b4c7d8e826c599ba55146c099a14beb5_dep.svg   |  556 +-
 .../dir_d331277d4303e21ded95616eb56c1a9e_dep.svg   |    6 +-
 .../dir_d3953cf7eb33eca56fc6850c0e98447d_dep.svg   |    6 +-
 .../dir_d4a54fa981698f72ef4cd62f8b9e1a8f_dep.svg   |    4 +-
 .../dir_dc867ff9a37cad1764f1670dc7eba6c1_dep.svg   |   12 +-
 .../dir_e4a1a856a30057b9b1543256279fc7a1.html      |    2 +-
 .../dir_f97d855a3173728370e632aa77170e34_dep.svg   |    8 +-
 .../api/doxygen/driver__api_8h_source.html         |    4 +-
 docs/reference/api/doxygen/elemwise_8h_source.html |    2 +-
 docs/reference/api/doxygen/env__func_8h.html       |    2 +-
 .../api/doxygen/env__func_8h__dep__incl.svg        |  920 ++-
 docs/reference/api/doxygen/files.html              |    7 +-
 docs/reference/api/doxygen/functions__.html        |    5 +-
 docs/reference/api/doxygen/functions_a.html        |   10 +-
 docs/reference/api/doxygen/functions_b.html        |    5 +-
 docs/reference/api/doxygen/functions_c.html        |    9 +-
 docs/reference/api/doxygen/functions_e.html        |   11 +-
 docs/reference/api/doxygen/functions_f.html        |    2 +-
 docs/reference/api/doxygen/functions_func_e.html   |    4 +-
 docs/reference/api/doxygen/functions_func_m.html   |    2 +-
 docs/reference/api/doxygen/functions_func_t.html   |   18 +-
 docs/reference/api/doxygen/functions_func_u.html   |    2 +-
 docs/reference/api/doxygen/functions_func_v.html   |   33 +-
 docs/reference/api/doxygen/functions_g.html        |    2 +-
 docs/reference/api/doxygen/functions_l.html        |    6 +-
 docs/reference/api/doxygen/functions_m.html        |    3 +-
 docs/reference/api/doxygen/functions_n.html        |    3 +-
 docs/reference/api/doxygen/functions_p.html        |    5 +-
 docs/reference/api/doxygen/functions_r.html        |    8 +-
 docs/reference/api/doxygen/functions_s.html        |   13 +-
 docs/reference/api/doxygen/functions_t.html        |   18 +-
 docs/reference/api/doxygen/functions_v.html        |   43 +-
 docs/reference/api/doxygen/functions_vars.html     |    5 +-
 docs/reference/api/doxygen/functions_vars_a.html   |    2 +
 docs/reference/api/doxygen/functions_vars_b.html   |    3 +-
 docs/reference/api/doxygen/functions_vars_c.html   |    1 +
 docs/reference/api/doxygen/functions_vars_e.html   |    3 +-
 docs/reference/api/doxygen/functions_vars_f.html   |    2 +-
 docs/reference/api/doxygen/functions_vars_g.html   |    2 +-
 docs/reference/api/doxygen/functions_vars_m.html   |    1 +
 docs/reference/api/doxygen/functions_vars_n.html   |    1 +
 docs/reference/api/doxygen/functions_vars_p.html   |    3 +-
 docs/reference/api/doxygen/functions_vars_r.html   |    4 +
 docs/reference/api/doxygen/functions_vars_s.html   |    3 +-
 .../api/doxygen/functor_8h__dep__incl.svg          |  520 +-
 docs/reference/api/doxygen/hierarchy.html          |  768 +--
 docs/reference/api/doxygen/inherit_graph_11.svg    |   16 +-
 docs/reference/api/doxygen/inherit_graph_116.svg   | 6350 +++++++++---------
 docs/reference/api/doxygen/inherit_graph_125.svg   | 6745 ++++++++++----------
 docs/reference/api/doxygen/inherit_graph_214.svg   |   16 +-
 docs/reference/api/doxygen/inherit_graph_215.svg   |   16 +-
 docs/reference/api/doxygen/inherit_graph_41.svg    |   16 +-
 docs/reference/api/doxygen/inherit_graph_45.svg    |    8 +-
 docs/reference/api/doxygen/inherits.html           |    4 +-
 .../api/doxygen/instrument_8h_source.html          |    4 +-
 .../api/doxygen/ir_2adt_8h__dep__incl.svg          |  180 +-
 .../api/doxygen/ir_2attrs_8h__dep__incl.svg        |  242 +-
 .../api/doxygen/ir_2expr_8h__dep__incl.svg         |  330 +-
 docs/reference/api/doxygen/ir_2function_8h.html    |    2 +-
 .../api/doxygen/ir_2function_8h__dep__incl.svg     | 1248 ++--
 .../api/doxygen/{ir_8h.html => ir_2ir_8h.html}     |   12 +-
 .../{ir_8h__incl.svg => ir_2ir_8h__incl.svg}       |    0
 docs/reference/api/doxygen/ir_2ir_8h_source.html   |   83 +
 .../api/doxygen/ir_2module_8h__dep__incl.svg       |  180 +-
 .../api/doxygen/ir_2module_8h_source.html          |    4 +-
 docs/reference/api/doxygen/ir_2op_8h.html          |    2 +-
 .../reference/api/doxygen/ir_2op_8h__dep__incl.svg |  965 ++-
 .../api/doxygen/ir_2span_8h__dep__incl.svg         |  418 +-
 .../api/doxygen/ir_2transform_8h_source.html       |    4 +-
 .../api/doxygen/ir_2type_8h__dep__incl.svg         |  478 +-
 docs/reference/api/doxygen/ir_8h_source.html       |   83 -
 .../api/doxygen/ir__builder_2ir_2frame_8h.html     |   12 +-
 .../ir__builder_2ir_2frame_8h__dep__incl.svg       |   58 +-
 .../doxygen/ir__builder_2ir_2frame_8h_source.html  |   18 +-
 ...ase_8h.html => ir__builder_2tir_2frame_8h.html} |   41 +-
 .../ir__builder_2tir_2frame_8h__dep__incl.svg      |   36 +
 .../doxygen/ir__builder_2tir_2frame_8h__incl.svg   | 1575 +++++
 .../doxygen/ir__builder_2tir_2frame_8h_source.html |  113 +
 .../doxygen/local__response__norm_8h_source.html   |    2 +-
 docs/reference/api/doxygen/map_8h__dep__incl.svg   |  470 +-
 docs/reference/api/doxygen/namespacemembers_e.html |    5 +-
 .../api/doxygen/namespacemembers_func_e.html       |    3 +
 .../api/doxygen/namespacemembers_func_i.html       |    2 +-
 .../api/doxygen/namespacemembers_func_m.html       |    8 +-
 .../api/doxygen/namespacemembers_func_p.html       |    5 +-
 .../api/doxygen/namespacemembers_func_s.html       |    6 +-
 docs/reference/api/doxygen/namespacemembers_i.html |    2 +-
 docs/reference/api/doxygen/namespacemembers_m.html |   14 +-
 docs/reference/api/doxygen/namespacemembers_p.html |    3 +
 docs/reference/api/doxygen/namespacemembers_s.html |    6 +-
 docs/reference/api/doxygen/namespaces.html         |    2 +
 .../namespacetvm_1_1script_1_1ir__builder.html     |   40 +-
 ...mespacetvm_1_1script_1_1ir__builder_1_1ir.html} |   45 +-
 ...espacetvm_1_1script_1_1ir__builder_1_1tir.html} |   79 +-
 .../reference/api/doxygen/namespacetvm_1_1tir.html |   49 +-
 .../api/doxygen/ndarray_8h__dep__incl.svg          |  556 +-
 docs/reference/api/doxygen/nn_2bnn_8h_source.html  |    2 +-
 .../api/doxygen/nn_2pooling_8h_source.html         |    2 +-
 docs/reference/api/doxygen/node_8h__dep__incl.svg  |  522 +-
 .../reference/api/doxygen/object_8h__dep__incl.svg |  756 +--
 .../api/doxygen/object__path_8h__dep__incl.svg     |  514 +-
 .../api/doxygen/optional_8h__dep__incl.svg         |  514 +-
 .../api/doxygen/packed__func_8h__dep__incl.svg     |  356 +-
 .../reference/api/doxygen/reduction_8h_source.html |    2 +-
 .../api/doxygen/reflection_8h__dep__incl.svg       |  690 +-
 docs/reference/api/doxygen/registry_8h.html        |    2 +-
 .../api/doxygen/registry_8h__dep__incl.svg         | 1003 ++-
 .../api/doxygen/relay_2analysis_8h_source.html     |    4 +-
 .../api/doxygen/repr__printer_8h__dep__incl.svg    |  514 +-
 .../runtime_2container_2adt_8h__dep__incl.svg      |  180 +-
 .../runtime_2container_2base_8h__dep__incl.svg     |  736 +--
 .../api/doxygen/runtime_2memory_8h__dep__incl.svg  |  582 +-
 .../api/doxygen/runtime_2module_8h__dep__incl.svg  |  336 +-
 .../api/doxygen/schedule__pass_8h_source.html      |    3 +-
 .../api/doxygen/script_2ir__builder_2base_8h.html  |    2 +-
 .../script_2ir__builder_2base_8h__dep__incl.svg    |   80 +-
 docs/reference/api/doxygen/search/all_1.js         |    2 +-
 docs/reference/api/doxygen/search/all_10.js        |    2 +
 docs/reference/api/doxygen/search/all_11.js        |    8 +-
 docs/reference/api/doxygen/search/all_13.js        |    3 +-
 docs/reference/api/doxygen/search/all_14.js        |   20 +-
 docs/reference/api/doxygen/search/all_15.js        |   25 +-
 docs/reference/api/doxygen/search/all_16.js        |    2 +-
 docs/reference/api/doxygen/search/all_17.js        |    2 +-
 docs/reference/api/doxygen/search/all_2.js         |    5 +-
 docs/reference/api/doxygen/search/all_3.js         |    2 +-
 docs/reference/api/doxygen/search/all_4.js         |    2 +-
 docs/reference/api/doxygen/search/all_6.js         |    8 +-
 docs/reference/api/doxygen/search/all_7.js         |    6 +-
 docs/reference/api/doxygen/search/all_8.js         |    2 +-
 docs/reference/api/doxygen/search/all_a.js         |    8 +-
 docs/reference/api/doxygen/search/all_d.js         |    4 +-
 docs/reference/api/doxygen/search/all_e.js         |   11 +-
 docs/reference/api/doxygen/search/all_f.js         |    2 +-
 docs/reference/api/doxygen/search/classes_0.js     |    1 +
 docs/reference/api/doxygen/search/classes_10.js    |    4 +-
 docs/reference/api/doxygen/search/classes_11.js    |    8 +-
 docs/reference/api/doxygen/search/classes_4.js     |    2 +-
 docs/reference/api/doxygen/search/classes_5.js     |    2 +-
 docs/reference/api/doxygen/search/classes_8.js     |    4 +-
 docs/reference/api/doxygen/search/classes_9.js     |    2 +-
 docs/reference/api/doxygen/search/classes_a.js     |    1 +
 docs/reference/api/doxygen/search/classes_c.js     |    2 +
 docs/reference/api/doxygen/search/classes_d.js     |    2 +
 docs/reference/api/doxygen/search/files_5.js       |    2 +-
 docs/reference/api/doxygen/search/files_7.js       |    2 +-
 docs/reference/api/doxygen/search/functions_10.js  |    2 +-
 docs/reference/api/doxygen/search/functions_13.js  |    6 +-
 docs/reference/api/doxygen/search/functions_14.js  |   11 +-
 docs/reference/api/doxygen/search/functions_15.js  |    2 +-
 docs/reference/api/doxygen/search/functions_16.js  |    2 +-
 docs/reference/api/doxygen/search/functions_5.js   |    4 +-
 docs/reference/api/doxygen/search/functions_9.js   |    2 +-
 docs/reference/api/doxygen/search/functions_d.js   |    4 +-
 docs/reference/api/doxygen/search/functions_e.js   |    2 +-
 docs/reference/api/doxygen/search/namespaces_1.js  |    3 +-
 docs/reference/api/doxygen/search/variables_0.js   |    2 +-
 docs/reference/api/doxygen/search/variables_1.js   |    4 +-
 docs/reference/api/doxygen/search/variables_10.js  |    3 +-
 docs/reference/api/doxygen/search/variables_11.js  |    2 +-
 docs/reference/api/doxygen/search/variables_2.js   |    2 +-
 docs/reference/api/doxygen/search/variables_3.js   |    2 +-
 docs/reference/api/doxygen/search/variables_5.js   |    2 +-
 docs/reference/api/doxygen/search/variables_6.js   |    2 +-
 docs/reference/api/doxygen/search/variables_7.js   |    2 +-
 docs/reference/api/doxygen/search/variables_c.js   |    2 +-
 docs/reference/api/doxygen/search/variables_d.js   |    2 +-
 docs/reference/api/doxygen/search/variables_f.js   |    2 +-
 .../api/doxygen/serializer_8h__dep__incl.svg       |  544 +-
 .../api/doxygen/shape__tuple_8h__dep__incl.svg     |  420 +-
 .../api/doxygen/source__map_8h__dep__incl.svg      |  256 +-
 docs/reference/api/doxygen/stmt_8h.html            |    2 +-
 docs/reference/api/doxygen/stmt_8h__dep__incl.svg  | 1198 ++--
 docs/reference/api/doxygen/stmt_8h_source.html     |    3 +-
 .../api/doxygen/strided__slice_8h_source.html      |    2 +-
 .../reference/api/doxygen/string_8h__dep__incl.svg |  474 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    |  506 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg |  506 +-
 .../api/doxygen/tir_2expr_8h__dep__incl.svg        |   30 +-
 .../api/doxygen/tir_2function_8h_source.html       |    3 +-
 .../api/doxygen/{ir_8h.html => tir_2ir_8h.html}    |   27 +-
 docs/reference/api/doxygen/tir_2ir_8h__incl.svg    | 1533 +++++
 docs/reference/api/doxygen/tir_2ir_8h_source.html  |   83 +
 docs/reference/api/doxygen/tir_2op_8h.html         |   13 +-
 .../api/doxygen/tir_2op_8h__dep__incl.svg          | 1580 +++--
 docs/reference/api/doxygen/tir_2op_8h_source.html  |   30 +-
 .../api/doxygen/tir_2transform_8h_source.html      |    5 +-
 .../api/doxygen/tir_2usmp_2analysis_8h_source.html |    5 +-
 docs/reference/api/doxygen/topi_2nn_8h_source.html |    4 +-
 .../api/doxygen/topi_2transform_8h_source.html     |    4 +-
 docs/reference/api/doxygen/type__relation_8h.html  |    2 +-
 .../api/doxygen/type__relation_8h__dep__incl.svg   |  946 ++-
 docs/reference/api/doxygen/var_8h__dep__incl.svg   |   54 +-
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    4 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    6 +-
 docs/tutorial/autotvm_matmul_x86.html              |   20 +-
 docs/tutorial/autotvm_relay_x86.html               |  258 +-
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/sg_execution_times.html              |   36 +-
 docs/tutorial/tensor_expr_get_started.html         |   41 +-
 475 files changed, 28593 insertions(+), 22239 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_darknet.rst.txt b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
index 16a886cd4..bcc50b3fd 100644
--- a/docs/_sources/how_to/compile_models/from_darknet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
@@ -317,7 +317,7 @@ The process is no different from other examples.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  3.774 seconds)
+   **Total running time of the script:** ( 1 minutes  6.919 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_darknet.py:
diff --git a/docs/_sources/how_to/compile_models/from_keras.rst.txt b/docs/_sources/how_to/compile_models/from_keras.rst.txt
index dcae0dcf6..30629a6d7 100644
--- a/docs/_sources/how_to/compile_models/from_keras.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_keras.rst.txt
@@ -228,7 +228,7 @@ Look up prediction top 1 index in 1000 class synset.
  .. code-block:: none
 
     Relay top-1 id: 285, class name: Egyptian cat
-
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 987ms/step
+
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 1s/step
     Keras top-1 id: 285, class name: Egyptian cat
 
 
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index 2b94e4a1d..55865e465 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -115,7 +115,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip4b3d6bf1-8db2-4a06-a57c-2b0e765bf118 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip042cf72f-1210-4030-b3cf-e4e9538b1927 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index e881cfa64..2003c6edd 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -116,7 +116,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     19%|#9        | 7.99M/41.5M [00:00<00:00, 61.9MB/s]
     35%|###4      | 14.3M/41.5M [00:00<00:00, 58.1MB/s]
     48%|####7     | 19.9M/41.5M [00:00<00:00, 53.4MB/s]
     77%|#######7  | 32.0M/41.5M [00:00<00:00, 56.8MB/s]
     93%|#########3| 38.6M/41.5M [00:00<00:00, 60.2MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 57.5MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     21%|##        | 8.60M/41.5M [00:00<00:00, 90.2MB/s]
     41%|####1     | 17.2M/41.5M [00:00<00:00, 68.1MB/s]
     58%|#####7    | 24.0M/41.5M [00:00<00:00, 49.5MB/s]
     77%|#######7  | 32.0M/41.5M [00:00<00:00, 48.5MB/s]
     89%|########9 | 36.9M/41.5M [00:00<00:00, 45.9MB/s]
    100%|#########9| 41.5M/41.5M [00:00<00:00, 43.3MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 48.6MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index a46332a4b..dbb42070f 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -94,7 +94,7 @@ Load a pretrained PyTorch model
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     28%|##7       | 12.4M/44.7M [00:00<00:00, 130MB/s]
     76%|#######6  | 34.1M/44.7M [00:00<00:00, 187MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 188MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
      7%|6         | 3.07M/44.7M [00:00<00:01, 32.1MB/s]
     14%|#3        | 6.22M/44.7M [00:00<00:01, 32.6MB/s]
     66%|######5   | 29.3M/44.7M [00:00<00:00, 128MB/s] 
    100%|##########| 44.7M/44.7M [00:00<00:00, 132MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 438d5448f..486fd750d 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -423,7 +423,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  4.983 seconds)
+   **Total running time of the script:** ( 1 minutes  10.418 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index e23411afa..c8dccb8e8 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**05:09.598** total execution time for **how_to_compile_models** files:
+**05:24.099** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:04.983 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:10.418 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:03.774 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:06.919 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:39.974 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:41.947 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:28.225 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:29.187 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:26.516 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:25.999 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:25.020 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:25.898 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:22.422 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:23.396 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:19.619 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:20.679 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:16.735 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:17.132 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.331 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.522 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index 9c74ef12e..2ac1706ef 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -441,7 +441,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      16.0189      15.9961      16.1743      15.8860       0.0982   
+      16.5020      16.4444      17.0750      16.3679       0.1983   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 9e3dae6ac..36c0ba588 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -123,7 +123,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
      3%|2         | 4.52M/170M [00:00<00:03, 47.4MB/s]
      9%|8         | 15.1M/170M [00:00<00:01, 84.6MB/s]
     21%|##1       | 35.9M/170M [00:00<00:00, 145MB/s] 
     36%|###5      | 60.9M/170M [00:00<00:00, 191MB/s]
     52%|#####1    | 87.6M/170M [00:00<00:00, 223MB/s]
     70%|######9   | 119M/170M [00:00<00:00, 258MB/s] 
     85%|########5 | 144M/170M [00:00<00:00, 261MB/s]
    100%|#########9| 169M/170M [00:00<00:00, 256MB/s]
    100%|##########| 170M/170M [00:00<00:00, 220MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      2%|2         | 3.75M/170M [00:00<00:04, 39.2MB/s]
      6%|5         | 10.0M/170M [00:00<00:03, 54.6MB/s]
      9%|8         | 15.2M/170M [00:00<00:03, 47.4MB/s]
     12%|#2        | 20.4M/170M [00:00<00:03, 50.1MB/s]
     16%|#5        | 26.5M/170M [00:00<00:02, 54.6MB/s]
     19%|#8        | 31.8M/170M [00:00<00:02, 51.1MB/s]
     22%|##1       | 36.8M/170M [00:00<00:02, 50.7MB/s]
     25%|##4       | 41.6M/170M [00:00<00:02, 50.3MB/s]
     27%|##7       | 46.5M/170M [00:00<00:02, 49.0MB/s]
     31%|###       | 52.4M/170M [00:01<00:02, 52.0MB/s]
     35%|###4      | 58.9M/170M [00:01<00:02, 56.8MB/s]
     38%|###7      | 64.4M/170M [00:01<00:01, 56.5MB/s]
     41%|####1     | 69.8M/170M [00:01<00:01, 56.6MB/s]
     44%|####4     | 75.2M/170M [00:01<00:01, 50.6MB/s]
     48%|####7     | 81.1M/170M [00:01<00:01, 53.7MB/s]
     51%|#####     | 86.4M/170M [00:01<00:01, 46.2MB/s]
     54%|#####3    | 91.0M/170M [00:01<00:01, 46.4MB/
 s]
     57%|#####6    | 96.6M/170M [00:01<00:01, 49.3MB/s]
     60%|######    | 102M/170M [00:02<00:01, 52.5MB/s] 
     63%|######3   | 108M/170M [00:02<00:01, 49.9MB/s]
     66%|######6   | 112M/170M [00:02<00:01, 47.0MB/s]
     69%|######8   | 117M/170M [00:02<00:01, 44.9MB/s]
     72%|#######1  | 122M/170M [00:02<00:01, 44.8MB/s]
     75%|#######5  | 128M/170M [00:02<00:00, 50.2MB/s]
     78%|#######8  | 133M/170M [00:02<00:00, 52.3MB/s]
     82%|########2 | 139M/170M [00:02<00:00, 55.6MB/s]
     85%|########5 | 145M/170M [00:03<00:00, 48.8MB/s]
     89%|########8 | 151M/170M [00:03<00:00, 52.2MB/s]
     92%|#########1| 156M/170M [00:03<00:00, 53.2MB/s]
     95%|#########5| 161M/170M [00:03<00:00, 43.9MB/s]
     98%|#########8| 167M/170M [00:03<00:00, 47.0MB/s]
    100%|##########| 170M/170M [00:03<00:00, 50.0MB/s]
     /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -295,7 +295,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  2.729 seconds)
+   **Total running time of the script:** ( 3 minutes  15.342 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index 8e0f078ed..f6bc6649f 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -232,7 +232,7 @@ training. Other models require a full post training calibration.
  .. code-block:: none
 
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     26%|##5       | 3.48M/13.6M [00:00<00:00, 36.2MB/s]
     63%|######3   | 8.56M/13.6M [00:00<00:00, 46.2MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 52.9MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 190MB/s]
 
 
 
@@ -412,7 +412,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.3306      90.2257      92.9293      90.0316       0.3912   
+      90.5458      90.3687      96.7163      90.2102       0.8075   
                
 
 
@@ -461,7 +461,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  11.458 seconds)
+   **Total running time of the script:** ( 1 minutes  13.587 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index ab3c68d0a..39d1fbceb 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -439,7 +439,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      119.5119     119.5018     120.4973     118.7849      0.3286   
+      121.1934     121.0105     129.8010     120.2194      1.0326   
                
 
 
@@ -476,7 +476,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  53.753 seconds)
+   **Total running time of the script:** ( 1 minutes  58.345 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 81c22e148..1f4b172c6 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -255,7 +255,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  28.023 seconds)
+   **Total running time of the script:** ( 1 minutes  27.791 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 6b5a1e85f..be60b3e59 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -158,7 +158,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|3         | 5272/132723 [00:00<00:02, 52712.89KB/s]
      9%|9         | 12402/132723 [00:00<00:01, 63641.72KB/s]
     15%|#4        | 19831/132723 [00:00<00:01, 68500.67KB/s]
     20%|##        | 26921/132723 [00:00<00:01, 69444.66KB/s]
     26%|##5       | 34116/132723 [00:00<00:01, 70346.61KB/s]
     31%|###1      | 41199/132723 [00:00<00:01, 70508.35KB/s]
     37%|###6      | 48639/132723 [00:00<00:01, 71777.88KB/s]
     42%|####2     | 55997/132723 [00:00<00:01, 72349.87KB/s]
     48%|####7     | 63464/132723 [00:00<00:00, 73068.72KB/s]
     53%|#####3    | 70933/132723 [00:01<00:00, 73566.94KB/s]
     59%|#####9    | 78320/132723 [00:01<00:00, 73657.29KB/s]
     65%|######4   | 85819/132723 [00:01<00:00, 74060.66KB/s]
     70%|#######   | 93325/132723 [00:01<00:00, 74358.88KB/s]
     76%|#######5  | 100842/132723 [00:01<00:00, 74600.92KB/s]
     82%|########1 | 108303/132723 [00:01<00:00, 74461.83KB/s]
     87%|########7
  | 115776/132723 [00:01<00:00, 74540.91KB/s]
     93%|#########2| 123292/132723 [00:01<00:00, 74725.30KB/s]
     99%|#########8| 130824/132723 [00:01<00:00, 74898.16KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 72673.01KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      5%|4         | 6366/132723 [00:00<00:01, 63648.68KB/s]
     11%|#1        | 14774/132723 [00:00<00:01, 75663.51KB/s]
     17%|#7        | 23165/132723 [00:00<00:01, 79425.04KB/s]
     24%|##3       | 31693/132723 [00:00<00:01, 81734.01KB/s]
     30%|###       | 40122/132723 [00:00<00:01, 82649.01KB/s]
     37%|###6      | 48586/132723 [00:00<00:01, 83318.62KB/s]
     43%|####3     | 57153/132723 [00:00<00:00, 84085.24KB/s]
     50%|####9     | 65727/132723 [00:00<00:00, 84609.19KB/s]
     56%|#####5    | 74294/132723 [00:00<00:00, 84937.90KB/s]
     62%|######2   | 82788/132723 [00:01<00:00, 84767.69KB/s]
     69%|######8   | 91312/132723 [00:01<00:00, 84907.79KB/s]
     75%|#######5  | 99869/132723 [00:01<00:00, 85105.78KB/s]
     82%|########1 | 108380/132723 [00:01<00:00, 85069.70KB/s]
     88%|########8 | 116937/132723 [00:01<00:00, 85218.20KB/s]
     95%|#########4| 125459/132723 [00:01<00:00, 84880.54KB/s]
    100%|########
 ##| 132723/132723 [00:01<00:00, 83491.23KB/s]
 
 
 
@@ -241,7 +241,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  41.715 seconds)
+   **Total running time of the script:** ( 2 minutes  46.808 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index b6815fa63..80ff69d47 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,24 +5,24 @@
 
 Computation times
 =================
-**11:35.081** total execution time for **how_to_deploy_models** files:
+**12:02.036** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:02.729 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:15.342 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:41.715 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 02:46.808 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:53.753 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 01:58.345 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:28.023 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:27.791 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:11.458 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:13.587 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:31.836 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:32.485 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:22.961 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:24.158 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:22.598 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:23.513 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.007 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index a26f5fa69..c2305a14b 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -476,7 +476,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip09334d33-df49-4c93-aaa6-395497eee699 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipb4573aff-917b-43a6-8528-13d5b714b02e from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index 8fe2e5e61..b90f1c2b7 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:42.138** total execution time for **how_to_extend_tvm** files:
+**00:42.814** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:38.907 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:39.495 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.235 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.320 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.986 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:00.991 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.009 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index 1daf48405..9b852cbd6 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -216,10 +216,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6768us [6768us] (45.72%; 45.72%)
-    FoldScaleAxis: 8036us [7us] (54.28%; 54.28%)
-            FoldConstant: 8030us [1670us] (54.24%; 99.92%)
-                    InferType: 6360us [6360us] (42.96%; 79.20%)
+    InferType: 7457us [7457us] (47.24%; 47.24%)
+    FoldScaleAxis: 8330us [7us] (52.76%; 52.76%)
+            FoldConstant: 8323us [1752us] (52.72%; 99.92%)
+                    InferType: 6572us [6572us] (41.63%; 78.96%)
 
 
 
@@ -258,10 +258,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 6472us [6472us] (44.72%; 44.72%)
-    FoldScaleAxis: 8000us [6us] (55.28%; 55.28%)
-            FoldConstant: 7994us [1712us] (55.24%; 99.93%)
-                    InferType: 6281us [6281us] (43.41%; 78.58%)
+    InferType: 6677us [6677us] (44.76%; 44.76%)
+    FoldScaleAxis: 8242us [7us] (55.24%; 55.24%)
+            FoldConstant: 8235us [1721us] (55.20%; 99.92%)
+                    InferType: 6514us [6514us] (43.67%; 79.11%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index afbad39ca..896cf697a 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -340,7 +340,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 54.159255 ms
+    Convolution: 54.123106 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index b10ddb241..258ba3deb 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -671,7 +671,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 6.841903 ms
+    conv2d with tensor core: 13.381761 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index 179910c7a..397e3b9d5 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -143,8 +143,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.019053
-    Baseline: 3.484342
+    Numpy running time: 0.019442
+    Baseline: 3.325164
 
 
 
@@ -239,7 +239,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.308756
+    Opt1: 0.336239
 
 
 
@@ -342,7 +342,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.350427
+    Opt2: 0.355516
 
 
 
@@ -438,7 +438,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.123910
+    Opt3: 0.135305
 
 
 
@@ -563,7 +563,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.110596
+    Opt4: 0.110355
 
 
 
@@ -685,7 +685,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.111957
+    Opt5: 0.112382
 
 
 
@@ -810,7 +810,7 @@ Furthermore, we can also utilize multi-core processors to do the thread-level pa
 
  .. code-block:: none
 
-    Opt6: 0.147962
+    Opt6: 0.148586
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index da6cedc82..337adc7da 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:35.177** total execution time for **how_to_optimize_operators** files:
+**00:35.477** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.956 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.953 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.228 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.386 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:00.993 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.138 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index c44df3494..35ff940c5 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**06:21.425** total execution time for **how_to_tune_with_autoscheduler** files:
+**06:37.339** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 03:30.817 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 03:44.426 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:24.333 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:26.025 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:47.837 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 00:48.701 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:20.289 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:19.544 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:09.196 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:09.434 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:08.953 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:09.209 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 0b9847514..e9f458ac7 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -247,11 +247,11 @@ cooperative fetching, unrolling and operator fusion.
                  compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
       buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
       preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 8;
+      attr [IterVar(blockIdx.x: int32, (nullptr), "ThreadIndex", "blockIdx.x")] "thread_extent" = 28;
       allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-      allocate(pad_temp.shared: Pointer(shared float32), float32, [504]), storage_scope = shared;
-      allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
-      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224 {
+      allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
+      allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
+      attr [IterVar(threadIdx.x: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
         conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope="local", align=32)[0] = 0f32
         conv2d_nchw_1[1] = 0f32
         conv2d_nchw_1[2] = 0f32
@@ -268,85 +268,461 @@ cooperative fetching, unrolling and operator fusion.
         conv2d_nchw_1[13] = 0f32
         for (rc.outer.outer: int32, 0, 64) {
           for (ry.outer.outer: int32, 0, 3) {
-            let cse_var_4: int32 = (rc.outer.outer*392)
-            let cse_var_3: int32 = (ry.outer.outer*7)
             let cse_var_2: int32 = (rc.outer.outer*72)
             let cse_var_1: int32 = (ry.outer.outer*3)
              {
-              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [504], [], scope="shared")[threadIdx.x_1] = @tir.if_then_else(((((1 <= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) && ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod(threadIdx.x_1, 9))) && (floormod(threadIdx.x_1, 9) < 8)), data[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 8), 9))) && (floormod((threadIdx.x_1 + 8), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 224), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-              attr [IterVar(threadIdx.x_1, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              if @tir.likely((threadIdx.x_1 < 56), dtype=bool) {
-                pad_temp.shared_1[(threadIdx.x_1 + 448)] = @tir.if_then_else(((((1 <= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) && ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) < 8)) && (1 <= floormod((threadIdx.x_1 + 7), 9))) && (floormod((threadIdx.x_1 + 7), 9) < 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 448), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-              }
-              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope="shared")[threadIdx.x_2] = kernel[((((((blockIdx.x*294912) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 224), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[(((((((blockIdx.x*294912) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 129024)]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 1120), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 224;
-              if @tir.likely((threadIdx.x_2 < 192), dtype=bool) {
-                kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((blockIdx.x*294912) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
-              }
-              for (rc.outer.inner: int32, 0, 8) {
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-                conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-                conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-                conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-                conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-                conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-                conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-                conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-                conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
+              attr [IterVar(threadIdx.x_1: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64 {
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope="shared")[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod((threadIdx.x_1*4), 9))) && (floormod((threadIdx.x_1*4), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod((threadIdx.x_1*4), 9)) - 8)], 0f3 [...]
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 1), 9))) && (floormod(((threadIdx.x_1*4) + 1), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0f32, dtype=float32)
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 2), 9))) && (floormod(((threadIdx.x_1*4) + 2), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0f32, dtype=float32)
+                }
+                if @tir.likely((threadIdx.x_1 < 18), dtype=bool) {
+                  pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 <= (ry.outer.outer + floormod(blockIdx.x, 7))) && ((ry.outer.outer + floormod(blockIdx.x, 7)) < 8)) && (1 <= floormod(((threadIdx.x_1*4) + 3), 9))) && (floormod(((threadIdx.x_1*4) + 3), 9) < 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0f32, dtype=float32)
+                }
               }
+              attr [IterVar(threadIdx.x_2: int32, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope="shared")[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 64), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 128), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 256), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 320), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 512), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 640), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 704), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 832), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1024), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1088), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1216), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1280), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1408), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1472), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1600), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1664), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1792), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1856), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1984), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2048), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2176), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2240), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2368), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2432), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2560), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2624), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2752), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2816), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2944), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+              attr [IterVar(threadIdx.x_2, (nullptr), "ThreadIndex", "threadIdx.x")] "thread_extent" = 64;
+              kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 3008), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+              conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+              conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
             }
           }
         }
         for (i1.inner: int32, 0, 2) {
           for (i3.inner: int32, 0, 7) {
-            compute[(((((blockIdx.x*3136) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*64) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+            compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
           }
         }
       }
@@ -402,7 +778,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.355 ms
+    Execution time of this operator: 0.356 ms
 
 
 
@@ -452,31 +828,31 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
     conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=32)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
     conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
-    conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+    conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
     conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=1)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=8)
+    conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
     conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
     conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
-    conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=3)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
+    conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
     compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=32)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
     compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
     compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
     compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
@@ -499,14 +875,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=224)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=224)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 64)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -524,10 +900,10 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(224) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+    extern "C" __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
       float conv2d_nchw[14];
-      __shared__ float pad_temp_shared[504];
-      __shared__ float kernel_shared[1536];
+      __shared__ float pad_temp_shared[72];
+      __shared__ float kernel_shared[3072];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
@@ -545,70 +921,408 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
         for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
           __syncthreads();
-          pad_temp_shared[((int)threadIdx.x)] = (((((1 <= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= (((int)threadIdx.x) % 9))) && ((((int)threadIdx.x) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
-          pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((1 <= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) && (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 8) % 9))) && (((((int)threadIdx.x) + 8) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 224) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-          if (((int)threadIdx.x) < 56) {
-            pad_temp_shared[(((int)threadIdx.x) + 448)] = (((((1 <= (((((int)threadIdx.x) + 7) / 9) + ry_outer_outer)) && ((((((int)threadIdx.x) + 7) / 9) + ry_outer_outer) < 8)) && (1 <= ((((int)threadIdx.x) + 7) % 9))) && (((((int)threadIdx.x) + 7) % 9) < 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 448) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 9))) && (((((int)threadIdx.x) * 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
           }
-          kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 294912) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 224) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 672)] = kernel[(((((((((int)blockIdx.x) * 294912) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 129024)];
-          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 1120) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          if (((int)threadIdx.x) < 192) {
-            kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[(((((((((int)blockIdx.x) * 294912) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 9))) && ((((((int)threadIdx.x) * 4) + 1) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
           }
-          __syncthreads();
-          for (int rc_outer_inner = 0; rc_outer_inner < 8; ++rc_outer_inner) {
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-            conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-            conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-            conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-            conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-            conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-            conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 9))) && ((((((int)threadIdx.x) * 4) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
           }
+          if (((int)threadIdx.x) < 18) {
+            pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 9))) && ((((((int)threadIdx.x) * 4) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
+          }
+          kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
+          kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
+          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
+          kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
+          kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
+          kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
+          kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+          kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
+          kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
+          kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
+          kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
+          kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
+          kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
+          kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
+          kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
+          kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+          kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+          __syncthreads();
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
         }
       }
       for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
         for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
-          compute[(((((((int)blockIdx.x) * 3136) + ((((int)threadIdx.x) / 7) * 98)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 64) + ((((int)threadIdx.x) / 7) * 2)) + i1_inner)]), 0.000000e+00f);
+          compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
         }
       }
     }
@@ -671,7 +1385,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  30.817 seconds)
+   **Total running time of the script:** ( 3 minutes  44.426 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index 0e2f5eeb7..29355b2d0 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -647,7 +647,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       9.7631       9.7916       9.8254       9.6723       0.0657   
+       9.7732       9.7991       9.8094       9.7111       0.0441   
                
 
 
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index 1ff0abde6..e0acd9acd 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -666,7 +666,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      766.9692     765.5452     770.3598     765.0026      2.4077   
+      772.5422     772.5742     772.6085     772.4439      0.0709   
                
 
 
@@ -694,7 +694,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  24.333 seconds)
+   **Total running time of the script:** ( 1 minutes  26.025 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index 43ee6f820..18ceb6b42 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -397,29 +397,30 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
                  placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
                  compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
       buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-      preflattened_buffer_map = {compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_6: placeholder_15: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_9: placeholder_16: Buffer(placeholder_14, float32, [128, 512], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_5: placeholder_18: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_19: Buffer(placeholder_12, int32, [4916], [])} {
-      for (i0.outer.i1.outer.fused: int32, 0, 32) "parallel" {
-        allocate(compute_4: Pointer(global float32), float32, [2048]), storage_scope = global {
-          for (i.outer.inner: int32, 0, 64) {
-            for (i.inner.init: int32, 0, 2) {
-              for (j.init: int32, 0, 16) {
-                compute_5: Buffer(compute_4, float32, [2048], [])[(((i.outer.inner*32) + (i.inner.init*16)) + j.init)] = 0f32
+      preflattened_buffer_map = {placeholder_6: placeholder_15: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_17: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_18: Buffer(placeholder_13, int32, [33], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], [])} {
+      for (i0.outer.i1.outer.fused: int32, 0, 16) "parallel" {
+        allocate(compute_4: Pointer(global float32), float32, [4096]), storage_scope = global {
+          for (i.outer.inner: int32, 0, 8) {
+            for (nb_j.inner: int32, 0, 2) {
+              for (i.inner.init: int32, 0, 16) {
+                for (j.init: int32, 0, 16) {
+                  compute_5: Buffer(compute_4, float32, [4096], [])[((((i.outer.inner*512) + (i.inner.init*32)) + (nb_j.inner*16)) + j.init)] = 0f32
+                }
               }
-            }
-            for (elem_idx: int32, 0, (placeholder_3[(i0.outer.i1.outer.fused + 1)] - placeholder_3[i0.outer.i1.outer.fused])) {
-              for (i.inner: int32, 0, 2) {
-                for (j: int32, 0, 16) {
-                  if @tir.likely((elem_idx < (placeholder_3[(i0.outer.i1.outer.fused + 1)] - placeholder_3[i0.outer.i1.outer.fused])), dtype=bool) {
-                    let cse_var_1: int32 = (((i.outer.inner*32) + (i.inner*16)) + j)
-                    compute_5[cse_var_1] = (compute_5[cse_var_1] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + j)]*max(placeholder[(((i.outer.inner*512) + (i.inner*256)) + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+              for (elem_idx: int32, 0, let cse_var_1: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner) in (placeholder_3[(cse_var_1 + 1)] - placeholder_3[cse_var_1])) {
+                for (i.inner: int32, 0, 16) {
+                  for (j: int32, 0, 16) {
+                    let cse_var_3: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner)
+                    let cse_var_2: int32 = ((((i.outer.inner*512) + (i.inner*32)) + (nb_j.inner*16)) + j)
+                    compute_5[cse_var_2] = (compute_5[cse_var_2] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + j)]*max(placeholder[(((i.outer.inner*4096) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
                   }
                 }
               }
             }
           }
           for (i0.inner: int32, 0, 128) {
-            let cse_var_2: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*16))
-            compute[ramp(cse_var_2, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_2, 1, 16)]), broadcast(0f32, 16))
+            let cse_var_4: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*32))
+            compute[ramp(cse_var_4, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_4, 1, 32)]), broadcast(0f32, 32))
           }
         }
       }
@@ -475,7 +476,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 2.156 ms
+    Execution time of this operator: 1.503 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index 8121dce2b..54b060d0f 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:46.109** total execution time for **how_to_tune_with_autotvm** files:
+**00:46.556** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:46.073 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:46.518 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.021 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.022 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.006 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 26afe102d..2fbbe32f7 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -1156,8 +1156,8 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 2, 1, 64]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4909501
-    No: 9   GFLOPS: 187.30/187.30   result: MeasureResult(costs=(0.0012359777555555557,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.039618492126465, timestamp=1662746351.936573)        [('tile_f', [-1, 1, 4, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,5072689
-    No: 10  GFLOPS: 0.00/187.30     result: Traceback (most recent call last):
+    No: 9   GFLOPS: 80.79/80.79     result: MeasureResult(costs=(0.002865295857142857,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7858080863952637, timestamp=1662756943.1289473)       [('tile_f', [-1, 1, 4, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,5072689
+    No: 10  GFLOPS: 0.00/80.79      result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1280,8 +1280,8 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 4, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 64, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,5092711
-    No: 11  GFLOPS: 260.82/260.82   result: MeasureResult(costs=(0.000887597729281768,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.792834997177124, timestamp=1662746352.852904) [('tile_f', [-1, 8, 2, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4264713
-    No: 12  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+    No: 11  GFLOPS: 258.82/258.82   result: MeasureResult(costs=(0.0008944500614525141,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8252346515655518, timestamp=1662756944.1135225)      [('tile_f', [-1, 8, 2, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4264713
+    No: 12  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1404,7 +1404,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 128, 1, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 256]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,183542
-    No: 13  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+    No: 13  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1527,7 +1527,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 8, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 64]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2482196
-    No: 14  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+    No: 14  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1650,9 +1650,9 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10306226
-    No: 15  GFLOPS: 5.36/260.82     result: MeasureResult(costs=(0.04319207975,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8189799785614014, timestamp=1662746357.4339721)      [('tile_f', [-1, 2, 2, 8]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5330964
-    No: 16  GFLOPS: 3.36/260.82     result: MeasureResult(costs=(0.0689593425,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.5808632373809814, timestamp=1662746358.669979)        [('tile_f', [-1, 8, 4, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2140058
-    No: 17  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+    No: 15  GFLOPS: 5.29/258.82     result: MeasureResult(costs=(0.043732662750000005,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8977558612823486, timestamp=1662756948.8730106)       [('tile_f', [-1, 2, 2, 8]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 8]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5330964
+    No: 16  GFLOPS: 3.35/258.82     result: MeasureResult(costs=(0.06900273625,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.738688230514526, timestamp=1662756950.1517107)       [('tile_f', [-1, 8, 4, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2140058
+    No: 17  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
         res = future.result()
       File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
@@ -1670,8 +1670,8 @@ for this template
     TimeoutError
 
             [('tile_f', [-1, 2, 2, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10195251
-    No: 18  GFLOPS: 27.03/260.82    result: MeasureResult(costs=(0.0085650848125,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3159880638122559, timestamp=1662746369.7579346)    [('tile_f', [-1, 4, 8, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6068603
-    No: 19  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+    No: 18  GFLOPS: 26.30/258.82    result: MeasureResult(costs=(0.008800986583333333,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.159686803817749, timestamp=1662756961.0743973)        [('tile_f', [-1, 4, 8, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6068603
+    No: 19  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1794,7 +1794,7 @@ for this template
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 871, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
     tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 16, 4, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6956993
-    No: 20  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+    No: 20  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 588, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 540, in _build_func_common
@@ -1973,7 +1973,7 @@ and measure running time.
     Best config:
     [('tile_f', [-1, 8, 2, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4264713
     Finish loading 20 records
-    Time cost of this operator: 0.001269
+    Time cost of this operator: 0.001225
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index 9134d70da..a3306e8d6 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -329,10 +329,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  311.3     98.725   (1, 2, 10, 10, 3)  2       1        [311.3]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.052     0.968    (1, 6, 10, 10)     1       1        [3.052]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.969     0.307    (1, 1, 10, 10, 3)  1       1        [0.969]           
-    Total_time                                    -                                             315.32    -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.2     98.714   (1, 2, 10, 10, 3)  2       1        [312.2]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.112     0.984    (1, 6, 10, 10)     1       1        [3.112]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.956     0.302    (1, 1, 10, 10, 3)  1       1        [0.956]           
+    Total_time                                    -                                             316.268   -        -                  -       -        -                 
 
 
 
@@ -398,10 +398,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  216.9     98.598   (1, 1, 10, 10, 6)  2       1        [216.9]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       2.242     1.019    (1, 6, 10, 10)     1       1        [2.242]           
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.842     0.383    (1, 3, 10, 10, 1)  1       1        [0.842]           
-    Total_time                                    -                                             219.983   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  123.1     97.808   (1, 6, 10, 10, 1)  2       1        [123.1]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.777     1.412    (1, 6, 10, 10)     1       1        [1.777]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.981     0.78     (1, 1, 10, 10, 3)  1       1        [0.981]           
+    Total_time                                    -                                             125.858   -        -                  -       -        -                 
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index 4e3de0e5a..4cf64dd68 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -225,7 +225,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpf74f6dwp/images/random'
+    '/tmp/tmp21psypts/images/random'
 
 
 
@@ -325,8 +325,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpf74f6dwp/images/target contains 8144 images
-    /tmp/tmpf74f6dwp/images/random contains 5000 images
+    /tmp/tmp21psypts/images/target contains 8144 images
+    /tmp/tmp21psypts/images/random contains 5000 images
 
 
 
@@ -501,13 +501,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 47s - loss: 0.2253 - accuracy: 0.9230 - val_loss: 0.1339 - val_accuracy: 0.9607 - 47s/epoch - 144ms/step
+    328/328 - 49s - loss: 0.2119 - accuracy: 0.9272 - val_loss: 0.1349 - val_accuracy: 0.9558 - 49s/epoch - 148ms/step
     Epoch 2/3
-    328/328 - 44s - loss: 0.1040 - accuracy: 0.9599 - val_loss: 0.1150 - val_accuracy: 0.9660 - 44s/epoch - 134ms/step
+    328/328 - 44s - loss: 0.0957 - accuracy: 0.9631 - val_loss: 0.1137 - val_accuracy: 0.9600 - 44s/epoch - 135ms/step
     Epoch 3/3
-    328/328 - 44s - loss: 0.0688 - accuracy: 0.9748 - val_loss: 0.1372 - val_accuracy: 0.9524 - 44s/epoch - 133ms/step
+    328/328 - 44s - loss: 0.0609 - accuracy: 0.9766 - val_loss: 0.1349 - val_accuracy: 0.9543 - 44s/epoch - 134ms/step
 
-    <keras.callbacks.History object at 0x7fefc7708990>
+    <keras.callbacks.History object at 0x7f72f597eb90>
 
 
 
@@ -871,7 +871,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 4 minutes  37.531 seconds)
+   **Total running time of the script:** ( 4 minutes  40.190 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 0e2c1375f..460dcfd3d 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,16 +5,16 @@
 
 Computation times
 =================
-**05:32.139** total execution time for **how_to_work_with_microtvm** files:
+**05:37.256** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:37.531 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:40.190 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:42.954 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:44.632 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:08.291 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:08.899 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.361 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.533 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.001 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index fd7afe62e..6212beb7b 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:43.662** total execution time for **how_to_work_with_relay** files:
+**00:42.933** total execution time for **how_to_work_with_relay** files:
 
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:32.334 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:33.124 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:09.810 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:08.427 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.511 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.375 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)                 | 00:00.007 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index d66862786..07a1f8997 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -261,7 +261,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7fefac70d9e0>
+    <function my_cuda_math_rule at 0x7f72cae55b90>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index 6281e19f6..5e023f7f7 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,20 +5,20 @@
 
 Computation times
 =================
-**00:04.233** total execution time for **how_to_work_with_schedules** files:
+**00:08.550** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:01.981 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:06.205 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:00.970 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.042 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.557 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.568 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.538 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.543 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.103 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.104 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.042 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.045 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.028 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 7a8c4abab..018d0413a 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -347,7 +347,7 @@ The importing needs to happen before the tensorized GEMV being executed.
                  C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
       buffer_map = {A_1: A, B_1: B, C_1: C}
       preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmpkl4so9ff/input0.cc'\nsource_filename = \"/tmp/tmpkl4so9ff/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
+      attr [IterVar(i: int32, (nullptr), "DataPar", "")] "pragma_import_llvm" = "; ModuleID = '/tmp/tmp5hvqdeql/input0.cc'\nsource_filename = \"/tmp/tmp5hvqdeql/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca floa [...]
       for (i, 0, 1024) {
         for (j.outer: int32, 0, 32) {
           @tir.call_extern("gemv_update", @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 5f40292c1..e89cafcde 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:22.118** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:22.812** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:22.111 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:22.805 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.007 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index d460020ba..9e115c324 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -291,7 +291,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 23.87s!
+    resnet18_v1 inference graph built in 24.02s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index 329a48d50..27e322d16 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -335,7 +335,7 @@ The compilation steps are:
       "target_host parameter is going to be deprecated. "
     /workspace/python/tvm/relay/build_module.py:348: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 16.76s!
+    yolov3-tiny inference graph built in 17.23s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index f470d1bc2..6a43bad2a 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:33.994** total execution time for **topic_vta_tutorials_frontend** files:
+**01:34.606** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:50.011 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:50.116 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:43.983 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:44.490 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 85e6012fb..ae5a422f1 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.031** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.024** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.634 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.605 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.398 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.419 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index f6a16e4f4..c7a7b8c20 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.739** total execution time for **topic_vta_tutorials** files:
+**00:00.772** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.394 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.414 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.345 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.357 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index 990defe94..cc028d189 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -203,6 +203,13 @@ trials, we can load the best schedule from the log file and apply it.
 
 
 
+.. rst-class:: sphx-glr-script-out
+
+ .. code-block:: none
+
+    .T
+
+
 
 
 
@@ -326,7 +333,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 94.939 ms
+    Execution time of this operator: 94.113 ms
 
 
 
@@ -442,6 +449,11 @@ Expression (TE) language that demonstrates how TVM can optimize computational
 operations.
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 1 minutes  14.111 seconds)
+
+
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
 
 .. only:: html
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index c40860e74..af7d1d08a 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -462,16 +462,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 9.54/9.54       result: MeasureResult(costs=(0.0281426766,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5830214023590088, timestamp=1662745122.6566837)       [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
-    No: 2   GFLOPS: 2.74/9.54       result: MeasureResult(costs=(0.09792412399999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7194128036499023, timestamp=1662745124.3898294)        [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
-    No: 3   GFLOPS: 11.66/11.66     result: MeasureResult(costs=(0.0230133636,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5560965538024902, timestamp=1662745125.4644012)       [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
-    No: 4   GFLOPS: 1.60/11.66      result: MeasureResult(costs=(0.16744102,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.793675184249878, timestamp=1662745128.853025)   [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
-    No: 5   GFLOPS: 3.65/11.66      result: MeasureResult(costs=(0.0735420462,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3163845539093018, timestamp=1662745130.8439822)       [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
-    No: 6   GFLOPS: 1.78/11.66      result: MeasureResult(costs=(0.15071351640000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5692601203918457, timestamp=1662745133.4538062)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
-    No: 7   GFLOPS: 0.86/11.66      result: MeasureResult(costs=(0.3117451672,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.104022741317749, timestamp=1662745138.6017158)        [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
-    No: 8   GFLOPS: 10.58/11.66     result: MeasureResult(costs=(0.025371573200000004,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.548820972442627, timestamp=1662745139.1702523)        [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
-    No: 9   GFLOPS: 1.88/11.66      result: MeasureResult(costs=(0.1427270144,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.382488250732422, timestamp=1662745141.671633) [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
-    No: 10  GFLOPS: 2.74/11.66      result: MeasureResult(costs=(0.0978624558,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6717867851257324, timestamp=1662745143.4000833)       [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
+    No: 1   GFLOPS: 10.80/10.80     result: MeasureResult(costs=(0.024865492,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5379376411437988, timestamp=1662755642.5263865)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 2   GFLOPS: 2.96/10.80      result: MeasureResult(costs=(0.09079858,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.6068522930145264, timestamp=1662755644.1461856) [('tile_y', [-1, 4]), ('tile_x', [-1, 8])],None,32
+    No: 3   GFLOPS: 11.73/11.73     result: MeasureResult(costs=(0.0228798156,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.554633378982544, timestamp=1662755645.2435226)        [('tile_y', [-1, 64]), ('tile_x', [-1, 32])],None,56
+    No: 4   GFLOPS: 1.86/11.73      result: MeasureResult(costs=(0.14449578619999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.4219093322753906, timestamp=1662755648.2837057)        [('tile_y', [-1, 1]), ('tile_x', [-1, 4])],None,20
+    No: 5   GFLOPS: 3.63/11.73      result: MeasureResult(costs=(0.0740425808,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3320631980895996, timestamp=1662755649.7460191)       [('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
+    No: 6   GFLOPS: 1.71/11.73      result: MeasureResult(costs=(0.15674306059999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.6395926475524902, timestamp=1662755652.9815567)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
+    No: 7   GFLOPS: 0.86/11.73      result: MeasureResult(costs=(0.311975595,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.114575624465942, timestamp=1662755658.1410728) [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 8   GFLOPS: 10.65/11.73     result: MeasureResult(costs=(0.025200068799999996,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5516161918640137, timestamp=1662755658.7103574)       [('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
+    No: 9   GFLOPS: 1.61/11.73      result: MeasureResult(costs=(0.16654494520000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.769784927368164, timestamp=1662755661.6031222) [('tile_y', [-1, 2]), ('tile_x', [-1, 2])],None,11
+    No: 10  GFLOPS: 2.47/11.73      result: MeasureResult(costs=(0.10859884119999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.849473237991333, timestamp=1662755663.507864)  [('tile_y', [-1, 4]), ('tile_x', [-1, 4])],None,22
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 66d444168..e1e0b0b8d 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -327,7 +327,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 515.8815004099985, 'median': 515.8990260999985, 'std': 1.398351159939487}
+    {'mean': 516.301202410009, 'median': 515.9303043500131, 'std': 1.4246596086627232}
 
 
 
@@ -563,30 +563,30 @@ the tuning data to.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.24/  17.24 GFLOPS | Progress: (4/20) | 6.52 s
    [Task  1/25]  Current/Best:    6.09/  17.24 GFLOPS | Progress: (8/20) | 9.61 s
    [Task  1/25]  Current/Best:   11.20/  22.20 GFLOPS | Progress: (12/20) | 12.12 s
    [Task  1/25]  Current/Best:   16.38/  22.22 GFLOPS | Progress: (16/20) | 13.83 s
    [Task  1/25]  Current/Best:   11.29/  23.13 GFLOPS | Progress: (20/20) | 15.61 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.22/  12.30 GFLOPS | Progress: (4/20) | 3.75 s
    [Task  2/25]  Current/Best:   11.58/  18.26 GFLOPS | Progress: (8/20) | 5.07 s
    [Task  2/25]  Current/Best:   20.97/  20.97 GFLOPS | Progress: (12/20) | 6.43 s
    [Task  2/25]  Current/Best:   11.08/  20.97 GFLOPS | Progress: (16/20) | 7.70 s
    [Task  2/25]  Current/Best:   17.21/  20.97 GFLOPS | Progress: (20/20) | 9.32 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.63/   9.94 GFLOPS | Progress: (4/20) | 5.92 s
    [Task  3/25]  Current/Best:   15.31/  16.85 GFLOPS | Progress: (8/20) | 7.90 s
    [Task  3/25]  Current/Best:   14.89/  16.85 GFLOPS | Progress: (12/20) | 9.65 s
    [Task  3/25]  Current/Best:    6.76/  22.36 GFLOPS | Progress: (16/20) | 11.63 s
    [Task  3/25]  Current/Best:   11.05/  22.36 GFLOPS | Progress: (20/20) | 16.28 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.03/  18.48 GFLOPS | Progress: (4/20) | 2.43 s
    [Task  4/25]  Current/Best:    6.59/  18.48 GFLOPS | Progress: (8/20) | 6.92 s
    [Task  4/25]  Current/Best:   20.86/  20.86 GFLOPS | Progress: (12/20) | 11.63 s
    [Task  4/25]  Current/Best:   16.52/  20.86 GFLOPS | Progress: (16/20) | 13.93 s
    [Task  4/25]  Current/Best:   12.51/  20.86 GFLOPS | Progress: (20/20) | 15.88 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    8.62/   9.33 GFLOPS | Progress: (4/20) | 2.70 s
    [Task  5/25]  Current/Best:   11.72/  11.72 GFLOPS | Progress: (8/20) | 4.86 s
    [Task  5/25]  Current/Best:   10.58/  17.82 GFLOPS | Progress: (12/20) | 8.01 s
    [Task  5/25]  Current/Best:   10.64/  22.06 GFLOPS | Progress: (16/20) | 9.49 s
    [Task  5/25]  Current/Best:   12.14/  22.06 GFLOPS | Progress: (20/20) | 11.36 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.02/  19.78 GFLOPS | Progress: (4/20) | 4.10 s
    [Task  6/25]  Current/Best:   18.80/  19.78 GFLOPS | Progress: (8/20) | 5.92 s
    [Task  6/25]  Current/Best:   13.15/  19.78 GFLOPS | Progress: (12/20) | 7.93 s
    [Task  6/25]  Current/Best:   19.21/  19.78 GFLOPS | Progress: (16/20) | 10.21 s
    [Task  6/25]  Current/Best:    3.67/  19.78 GFLOPS | Progress: (20/20) | 12.84 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:    9.71/  12.08 GFLOPS | Progress: (4/20) | 3.70 s
    [Task  7/25]  Current/Best:   19.41/  19.97 GFLOPS | Progress: (8/20) | 5.24 s
    [Task  7/25]  Current/Best:   16.01/  19.97 GFLOPS | Progress: (12/20) | 7.17 s
    [Task  7/25]  Current/Best:   11.81/  19.97 GFLOPS | Progress: (16/20) | 9.29 s
    [Task  7/25]  Current/Best:    5.93/  20.34 GFLOPS | Progress: (20/20) | 11.82 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:    9.45/  13.27 GFLOPS | Progress: (4/20) | 3.05 s
    [Task  8/25]  Current/Best:    9.51/  13.27 GFLOPS | Progress: (8/20) | 7.88 s
    [Task  8/25]  Current/Best:   12.74/  13.27 GFLOPS | Progress: (12/20) | 14.19 s
    [Task  8/25]  Current/Best:   18.93/  18.93 GFLOPS | Progress: (16/20) | 16.32 s
    [Task  8/25]  Current/Best:   18.86/  18.93 GFLOPS | Progress: (20/20) | 23.00 s Done.
-
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.30/  14.30 GFLOPS | Progress: (4/20) | 12.02 s
    [Task  9/25]  Current/Best:   22.92/  22.92 GFLOPS | Progress: (8/20) | 13.86 s
    [Task  9/25]  Current/Best:    8.02/  22.92 GFLOPS | Progress: (12/20) | 16.26 s
    [Task  9/25]  Current/Best:   17.94/  22.92 GFLOPS | Progress: (16/20) | 18.98 s
    [Task  9/25]  Current/Best:    9.16/  22.92 GFLOPS | Progress: (20/20) | 26.84 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.17/  18.17 GFLOPS | Progress: (4/20) | 2.60 s
    [Task 10/25]  Current/Best:   15.71/  18.17 GFLOPS | Progress: (8/20) | 4.25 s
    [Task 10/25]  Current/Best:   11.03/  18.82 GFLOPS | Progress: (12/20) | 5.80 s
    [Task 10/25]  Current/Best:   19.00/  20.18 GFLOPS | Progress: (16/20) | 6.93 s
    [Task 10/25]  Current/Best:    8.61/  20.18 GFLOPS | Progress: (20/20
 ) | 8.48 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   10.71/  18.21 GFLOPS | Progress: (4/20) | 3.46 s
    [Task 11/25]  Current/Best:   14.86/  18.21 GFLOPS | Progress: (8/20) | 6.26 s
    [Task 11/25]  Current/Best:   15.85/  18.21 GFLOPS | Progress: (12/20) | 8.33 s
    [Task 11/25]  Current/Best:   11.87/  20.48 GFLOPS | Progress: (16/20) | 11.22 s
    [Task 11/25]  Current/Best:   18.57/  20.48 GFLOPS | Progress: (20/20) | 13.27 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.71/  17.80 GFLOPS | Progress: (4/20) | 5.52 s
    [Task 12/25]  Current/Best:    4.97/  17.80 GFLOPS | Progress: (8/20) | 9.34 s
    [Task 12/25]  Current/Best:   18.80/  18.80 GFLOPS | Progress: (12/20) | 11.39 s
    [Task 12/25]  Current/Best:   14.34/  18.80 GFLOPS | Progress: (16/20) | 14.23 s
    [Task 12/25]  Current/Best:   15.02/  18.80 GFLOPS | Progress: (20/20) | 16.23 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.48/  17.17 GFLOPS | Progress: (4/20) | 3.80 s
    [Task 13/25]  Current/Best:   14.39/  20.59 GFLOPS | Progress: (8/20) | 6.29 s
    [Task 13/25]  Current/Best:   18.77/  21.29 GFLOPS | Progress: (12/20) | 9.25 s
    [Task 13/25]  Current/Best:   12.19/  21.29 GFLOPS | Progress: (16/20) | 12.71 s
    [Task 13/25]  Current/Best:   17.65/  21.29 GFLOPS | Progress: (20/20) | 15.02 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   12.02/  13.20 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 14/25]  Current/Best:    6.08/  13.20 GFLOPS | Progress: (8/20) | 5.57 s
    [Task 14/25]  Current/Best:   19.30/  19.30 GFLOPS | Progress: (12/20) | 8.15 s
    [Task 14/25]  Current/Best:   14.97/  19.30 GFLOPS | Progress: (16/20) | 9.81 s Done.
-
    [Task 14/25]  Current/Best:   16.76/  19.30 GFLOPS | Progress: (20/20) | 11.56 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   15.47/  17.24 GFLOPS | Progress: (4/20) | 2.76 s
    [Task 15/25]  Current/Best:   12.64/  17.49 GFLOPS | Progress: (8/20) | 4.11 s
    [Task 15/25]  Current/Best:    9.91/  20.00 GFLOPS | Progress: (12/20) | 6.25 s
    [Task 15/25]  Current/Best:   20.36/  20.36 GFLOPS | Progress: (16/20) | 9.24 s
    [Task 15/25]  Current/Best:    9.34/  20.36 GFLOPS | Progress: (20/20) | 10.26 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   18.46/  18.46 GFLOPS | Progress: (4/20) | 3.06 s
    [Task 16/25]  Current/Best:    3.03/  18.46 GFLOPS | Progress: (8/20) | 4.69 s
    [Task 16/25]  Current/Best:   17.94/  19.19 GFLOPS | Progress: (12/20) | 5.93 s
    [Task 16/25]  Current/Best:   17.44/  19.19 GFLOPS | Progress: (16/20) |
  7.33 s
    [Task 16/25]  Current/Best:   10.26/  21.18 GFLOPS | Progress: (20/20) | 9.42 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   12.73/  16.04 GFLOPS | Progress: (4/20) | 4.84 s
    [Task 17/25]  Current/Best:   12.68/  22.78 GFLOPS | Progress: (8/20) | 7.66 s
    [Task 17/25]  Current/Best:   16.47/  22.78 GFLOPS | Progress: (12/20) | 9.80 s
    [Task 17/25]  Current/Best:   16.39/  22.78 GFLOPS | Progress: (16/20) | 11.97 s
    [Task 17/25]  Current/Best:    9.93/  22.78 GFLOPS | Progress: (20/20) | 14.11 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   10.73/  16.77 GFLOPS | Progress: (4/20) | 3.83 s
    [Task 18/25]  Current/Best:   10.54/  19.15 GFLOPS | Progress: (8/20) | 7.35 s
    [Task 18/25]  Current/Best:   19.09/  19.15 GFLOPS | Progress: (12/20) | 9.31 s
    [Task 18/25]  Current/Best:    9.49/  19.15 GFLOPS | Progress: (16/20) | 12.96 s
    [Task 18/25]  Current/Best:   20.57/  20.57 GFLOPS | Progress: (20/20) | 14.51 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    6.87/  19.64 GFLOPS | Progress: (4/20) | 6.15 s
    [Task 19/25]  Current/Best:    2.69/  19.64 GFLOPS | Progress: (8/20) | 9.43 s
    [Task 19/25]  Current/Best:   18.27/  20.02 GFLOPS | Progress: (12/20) | 12.25 s
    [Task 19/25]  Current/Best:   13.39/  20.02 GFLOPS | Progress: (16/20) | 15.15 s
    [Task 19/25]  Current/Best:    2.69/  22.27 GFLOPS | Progress: (20/20) | 18.01 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    7.40/  15.13 GFLOPS | Progress: (4/20) | 3.42 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   17.37/  17.37 GFLOPS | Progress: (4/20) | 6.62 s
    [Task  1/25]  Current/Best:    6.09/  17.37 GFLOPS | Progress: (8/20) | 9.66 s
    [Task  1/25]  Current/Best:   11.16/  22.26 GFLOPS | Progress: (12/20) | 12.21 s
    [Task  1/25]  Current/Best:   16.37/  22.26 GFLOPS | Progress: (16/20) | 13.92 s
    [Task  1/25]  Current/Best:   11.32/  23.58 GFLOPS | Progress: (20/20) | 15.73 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   12.09/  12.50 GFLOPS | Progress: (4/20) | 3.89 s
    [Task  2/25]  Current/Best:   12.65/  18.34 GFLOPS | Progress: (8/20) | 5.21 s
    [Task  2/25]  Current/Best:   21.01/  21.01 GFLOPS | Progress: (12/20) | 6.55 s
    [Task  2/25]  Current/Best:   11.38/  21.01 GFLOPS | Progress: (16/20) | 7.84 s
    [Task  2/25]  Current/Best:   18.29/  21.01 GFLOPS | Progress: (20/20) | 9.50 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:    1.62/  10.04 GFLOPS | Progress: (4/20) | 5.99 s
    [Task  3/25]  Current/Best:   15.28/  16.83 GFLOPS | Progress: (8/20) | 7.97 s
    [Task  3/25]  Current/Best:   14.91/  16.83 GFLOPS | Progress: (12/20) | 9.72 s
    [Task  3/25]  Current/Best:    6.79/  23.20 GFLOPS | Progress: (16/20) | 11.76 s
    [Task  3/25]  Current/Best:   10.93/  23.20 GFLOPS | Progress: (20/20) | 16.45 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    9.04/  18.57 GFLOPS | Progress: (4/20) | 2.49 s
    [Task  4/25]  Current/Best:    6.36/  18.57 GFLOPS | Progress: (8/20) | 7.38 s
    [Task  4/25]  Current/Best:   20.43/  20.43 GFLOPS | Progress: (12/20) | 12.40 s
    [Task  4/25]  Current/Best:   14.32/  20.43 GFLOPS | Progress: (16/20) | 14.87 s
    [Task  4/25]  Current/Best:   12.42/  20.43 GFLOPS | Progress: (20/20) | 16.89 s Done.
+
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:    9.34/   9.79 GFLOPS | Progress: (4/20) | 2.71 s
    [Task  5/25]  Current/Best:   11.74/  11.74 GFLOPS | Progress: (8/20) | 4.80 s
    [Task  5/25]  Current/Best:    9.48/  17.95 GFLOPS | Progress: (12/20) | 8.11 s
    [Task  5/25]  Current/Best:   11.60/  20.70 GFLOPS | Progress: (16/20) | 9.59 s
    [Task  5/25]  Current/Best:   11.88/  20.88 GFLOPS | Progress: (20/20) | 11.52 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   12.05/  19.93 GFLOPS | Progress: (4/20) | 4.25 s
    [Task  6/25]  Current/Best:   18.75/  19.93 GFLOPS | Progress: (8/20) | 6.05 s
    [Task  6/25]  Current/Best:   12.90/  19.93 GFLOPS | Progress: (12/20) | 8.09 s
    [Task  6/25]  Current/Best:   19.25/  19.93 GFLOPS | Progress: (16/20) | 10.41 s
    [Task  6/25]  Current/Best:    3.75/  19.93 GFLOPS | Progress: (20/20) | 13.01 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:    9.67/  12.10 GFLOPS | Progress: (4/20) | 3.83 s
    [Task  7/25]  Current/Best:   19.35/  20.14 GFLOPS | Progress: (8/20) | 5.40 s
    [Task  7/25]  Current/Best:   15.71/  20.14 GFLOPS | Progress: (12/20) | 7.35 s
    [Task  7/25]  Current/Best:   12.14/  20.14 GFLOPS | Progress: (16/20) | 9.45 s
    [Task  7/25]  Current/Best:    6.15/  20.14 GFLOPS | Progress: (20/20) | 12.00 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   10.52/  14.07 GFLOPS | Progress: (4/20) | 3.04 s
    [Task  8/25]  Current/Best:    9.83/  14.07 GFLOPS | Progress: (8/20) | 8.34 s
    [Task  8/25]  Current/Best:   13.39/  14.07 GFLOPS | Progress: (12/20) | 15.09 s
    [Task  8/25]  Current/Best:   18.93/  18.93 GFLOPS | Progress: (16/20) | 17.22 s
    [Task  8/25]  Current/Best:   18.27/  18.93 GFLOPS | Progress: (20/20) | 24.49 s Done.
+
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   14.06/  14.06 GFLOPS | Progress: (4/20) | 12.05 s
    [Task  9/25]  Current/Best:   22.43/  22.43 GFLOPS | Progress: (8/20) | 13.86 s
    [Task  9/25]  Current/Best:    7.97/  22.43 GFLOPS | Progress: (12/20) | 16.48 s
    [Task  9/25]  Current/Best:   17.58/  22.43 GFLOPS | Progress: (16/20) | 19.41 s
    [Task  9/25]  Current/Best:    8.89/  22.43 GFLOPS | Progress: (20/20) | 28.24 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:   18.42/  18.42 GFLOPS | Progress: (4/20) | 2.69 s
    [Task 10/25]  Current/Best:   15.77/  18.42 GFLOPS | Progress: (8/20) | 4.36 s
    [Task 10/25]  Current/Best:   11.59/  19.04 GFLOPS | Progress: (12/20) | 5.94 s
    [Task 10/25]  Current/Best:   18.91/  20.48 GFLOPS | Progress: (16/20) | 7.07 s
    [Task 10/25]  Current/Best:    8.72/  20.48 GFLOPS | Progress: (20/20
 ) | 8.63 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   10.74/  18.09 GFLOPS | Progress: (4/20) | 3.56 s
    [Task 11/25]  Current/Best:   14.87/  18.09 GFLOPS | Progress: (8/20) | 6.46 s
    [Task 11/25]  Current/Best:   15.91/  18.09 GFLOPS | Progress: (12/20) | 8.60 s
    [Task 11/25]  Current/Best:   11.83/  20.53 GFLOPS | Progress: (16/20) | 11.59 s
    [Task 11/25]  Current/Best:   18.49/  20.53 GFLOPS | Progress: (20/20) | 13.75 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    7.74/  18.11 GFLOPS | Progress: (4/20) | 5.99 s
    [Task 12/25]  Current/Best:    5.14/  18.11 GFLOPS | Progress: (8/20) | 10.03 s
    [Task 12/25]  Current/Best:   19.20/  19.20 GFLOPS | Progress: (12/20) | 12.04 s
    [Task 12/25]  Current/Best:   12.96/  19.20 GFLOPS | Progress: (16/20) | 15.08 s
    [Task 12/25]  Current/Best:   15.20/  19.20 GFLOPS | Progress: (20/20) | 17.07 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.39/  17.18 GFLOPS | Progress: (4/20) | 3.92 s
    [Task 13/25]  Current/Best:   15.24/  20.62 GFLOPS | Progress: (8/20) | 6.60 s
    [Task 13/25]  Current/Best:   17.83/  21.51 GFLOPS | Progress: (12/20) | 9.68 s
    [Task 13/25]  Current/Best:   12.16/  21.51 GFLOPS | Progress: (16/20) | 13.22 s
    [Task 13/25]  Current/Best:   17.62/  21.51 GFLOPS | Progress: (20/20) | 15.65 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   12.15/  13.23 GFLOPS | Progress: (4/20) | 3.56 s
    [Task 14/25]  Current/Best:    6.08/  13.23 GFLOPS | Progress: (8/20) | 5.77 s
    [Task 14/25]  Current/Best:   19.51/  19.51 GFLOPS | Progress: (12/20) | 8.54 s
    [Task 14/25]  Current/Best:   16.24/  19.51 GFLOPS | Progress: (16/20) | 10.21 s Done.
+
    [Task 14/25]  Current/Best:   16.93/  19.51 GFLOPS | Progress: (20/20) | 11.99 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   15.56/  16.42 GFLOPS | Progress: (4/20) | 2.85 s
    [Task 15/25]  Current/Best:   12.60/  17.71 GFLOPS | Progress: (8/20) | 4.19 s
    [Task 15/25]  Current/Best:    9.81/  21.44 GFLOPS | Progress: (12/20) | 6.53 s
    [Task 15/25]  Current/Best:   19.49/  21.44 GFLOPS | Progress: (16/20) | 9.77 s
    [Task 15/25]  Current/Best:    9.22/  21.44 GFLOPS | Progress: (20/20) | 10.82 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   19.15/  19.15 GFLOPS | Progress: (4/20) | 3.10 s
    [Task 16/25]  Current/Best:    3.01/  19.15 GFLOPS | Progress: (8/20) | 4.74 s
    [Task 16/25]  Current/Best:   17.11/  19.32 GFLOPS | Progress: (12/20) | 6.00 s
    [Task 16/25]  Current/Best:   18.17/  19.32 GFLOPS | Progress: (16/20) |
  7.39 s
    [Task 16/25]  Current/Best:    9.86/  19.32 GFLOPS | Progress: (20/20) | 9.62 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   12.66/  15.99 GFLOPS | Progress: (4/20) | 4.98 s
    [Task 17/25]  Current/Best:   12.67/  22.62 GFLOPS | Progress: (8/20) | 7.96 s
    [Task 17/25]  Current/Best:   16.33/  22.62 GFLOPS | Progress: (12/20) | 10.08 s
    [Task 17/25]  Current/Best:   16.41/  22.62 GFLOPS | Progress: (16/20) | 12.35 s
    [Task 17/25]  Current/Best:    9.72/  22.62 GFLOPS | Progress: (20/20) | 14.58 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   11.03/  16.79 GFLOPS | Progress: (4/20) | 3.96 s
    [Task 18/25]  Current/Best:   10.61/  18.34 GFLOPS | Progress: (8/20) | 7.75 s
    [Task 18/25]  Current/Best:   18.89/  18.89 GFLOPS | Progress: (12/20) | 9.74 s
    [Task 18/25]  Current/Best:    9.74/  18.89 GFLOPS | Progress: (16/20) | 13.71 s
    [Task 18/25]  Current/Best:   20.53/  20.53 GFLOPS | Progress: (20/20) | 15.29 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    7.05/  19.12 GFLOPS | Progress: (4/20) | 6.35 s
    [Task 19/25]  Current/Best:    2.69/  19.12 GFLOPS | Progress: (8/20) | 9.68 s
    [Task 19/25]  Current/Best:   17.90/  20.09 GFLOPS | Progress: (12/20) | 12.66 s
    [Task 19/25]  Current/Best:   13.46/  20.09 GFLOPS | Progress: (16/20) | 15.69 s
    [Task 19/25]  Current/Best:    2.69/  21.84 GFLOPS | Progress: (20/20) | 18.50 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:    9.04/  15.04 GFLOPS | Progress: (4/20) | 3.42 s Done.
      Done.
-
    [Task 20/25]  Current/Best:    9.51/  15.13 GFLOPS | Progress: (8/20) | 6.94 s
    [Task 20/25]  Current/Best:    2.31/  15.13 GFLOPS | Progress: (12/20) | 10.85 s
    [Task 20/25]  Current/Best:   10.99/  15.13 GFLOPS | Progress: (16/20) | 14.63 s
    [Task 20/25]  Current/Best:   11.70/  21.48 GFLOPS | Progress: (20/20) | 16.74 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.32/  17.59 GFLOPS | Progress: (4/20) | 3.28 s
    [Task 21/25]  Current/Best:   14.46/  17.59 GFLOPS | Progress: (8/20) | 4.83 s
    [Task 21/25]  Current/Best:    1.61/  17.59 GFLOPS | Progress: (12/20) | 6.96 s
    [Task 21/25]  Current/Best:   15.94/  17.59 GFLOPS | Progress: (16/20) | 10.45 s
    [Task 21/25]  Current/Best:    4.40/  17.59 GFLOPS | Progress: (20/20) | 17.85 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.70/  16.75 GFLOPS | Progress: (4/20
 ) | 2.73 s
    [Task 22/25]  Current/Best:    9.17/  21.18 GFLOPS | Progress: (8/20) | 4.70 s
    [Task 22/25]  Current/Best:   19.86/  21.18 GFLOPS | Progress: (12/20) | 7.02 s
    [Task 22/25]  Current/Best:   15.16/  21.18 GFLOPS | Progress: (16/20) | 9.09 s
    [Task 22/25]  Current/Best:   13.17/  21.18 GFLOPS | Progress: (20/20) | 10.85 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   16.38/  19.75 GFLOPS | Progress: (4/20) | 3.32 s
    [Task 23/25]  Current/Best:   14.07/  19.75 GFLOPS | Progress: (8/20) | 6.71 s
    [Task 23/25]  Current/Best:   20.37/  21.40 GFLOPS | Progress: (12/20) | 8.55 s
    [Task 23/25]  Current/Best:    6.37/  21.40 GFLOPS | Progress: (16/20) | 15.69 s
    [Task 23/25]  Current/Best:    7.32/  21.40 GFLOPS | Progress: (20/20) | 19.94 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.35/   8.35 GFLOPS | Progress: (4/20) | 11.83 s
    [Task 24/25]  Current/Best:    3.06/   8.35 GFLOPS | Progress: (8/20) | 23.11 s
    [Task 24/25]  Current/Best:    3.44/   8.35 GFLOPS | Progress: (12/20) | 33.84 s Done.
-
    [Task 24/25]  Current/Best:    5.75/   8.63 GFLOPS | Progress: (16/20) | 39.28 s
    [Task 24/25]  Current/Best:    2.94/   8.63 GFLOPS | Progress: (20/20) | 45.38 s Done.
-
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.55/   2.90 GFLOPS | Progress: (4/20) | 11.64 s
    [Task 25/25]  Current/Best:    5.79/   7.79 GFLOPS | Progress: (8/20) | 22.91 s
    [Task 25/25]  Current/Best:    5.79/   7.79 GFLOPS | Progress: (12/20) | 34.22 s
    [Task 25/25]  Current/Best:    5.74/   8.24 GFLOPS | Progress: (16/20) | 36.05 s
    [Task 25/25]  Current/Best:    2.85/   8.82 GFLOPS | Progress: (20/20) | 46.71 s
+
    [Task 20/25]  Current/Best:   10.26/  15.04 GFLOPS | Progress: (8/20) | 6.85 s
    [Task 20/25]  Current/Best:    2.31/  15.04 GFLOPS | Progress: (12/20) | 10.86 s
    [Task 20/25]  Current/Best:   11.07/  15.04 GFLOPS | Progress: (16/20) | 14.87 s
    [Task 20/25]  Current/Best:   11.88/  21.35 GFLOPS | Progress: (20/20) | 17.01 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    6.31/  17.68 GFLOPS | Progress: (4/20) | 3.34 s
    [Task 21/25]  Current/Best:   14.23/  17.68 GFLOPS | Progress: (8/20) | 5.01 s
    [Task 21/25]  Current/Best:    1.61/  17.68 GFLOPS | Progress: (12/20) | 7.18 s
    [Task 21/25]  Current/Best:   16.05/  17.68 GFLOPS | Progress: (16/20) | 10.76 s
    [Task 21/25]  Current/Best:    4.46/  17.68 GFLOPS | Progress: (20/20) | 18.21 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    2.69/  16.86 GFLOPS | Progress: (4/20
 ) | 2.73 s
    [Task 22/25]  Current/Best:    9.28/  19.84 GFLOPS | Progress: (8/20) | 4.79 s
    [Task 22/25]  Current/Best:   19.45/  19.84 GFLOPS | Progress: (12/20) | 7.23 s
    [Task 22/25]  Current/Best:   15.17/  19.84 GFLOPS | Progress: (16/20) | 9.43 s
    [Task 22/25]  Current/Best:   13.19/  19.84 GFLOPS | Progress: (20/20) | 11.24 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   16.25/  19.27 GFLOPS | Progress: (4/20) | 3.38 s
    [Task 23/25]  Current/Best:   14.06/  19.84 GFLOPS | Progress: (8/20) | 6.75 s
    [Task 23/25]  Current/Best:   20.22/  20.72 GFLOPS | Progress: (12/20) | 8.66 s
    [Task 23/25]  Current/Best:    5.96/  20.72 GFLOPS | Progress: (16/20) | 16.00 s
    [Task 23/25]  Current/Best:    7.16/  20.72 GFLOPS | Progress: (20/20) | 20.35 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    8.06/   8.06 GFLOPS | Progress: (4/20) | 11.91 s
    [Task 24/25]  Current/Best:    1.71/   8.06 GFLOPS | Progress: (8/20) | 23.03 s
    [Task 24/25]  Current/Best:    2.88/   8.06 GFLOPS | Progress: (12/20) | 34.64 s Done.
+
    [Task 24/25]  Current/Best:    6.44/   8.62 GFLOPS | Progress: (16/20) | 40.51 s
    [Task 24/25]  Current/Best:    2.86/   8.62 GFLOPS | Progress: (20/20) | 46.60 s Done.
+
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    1.54/   2.83 GFLOPS | Progress: (4/20) | 11.69 s
    [Task 25/25]  Current/Best:    5.62/   7.59 GFLOPS | Progress: (8/20) | 23.03 s
    [Task 25/25]  Current/Best:    5.88/   7.59 GFLOPS | Progress: (12/20) | 34.55 s
    [Task 25/25]  Current/Best:    5.60/   9.10 GFLOPS | Progress: (16/20) | 36.37 s
    [Task 25/25]  Current/Best:    2.79/   9.10 GFLOPS | Progress: (20/20) | 47.13 s
 
 
 
@@ -748,8 +748,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 410.5049117899989, 'median': 410.53536645000577, 'std': 0.5414019445738604}
-    unoptimized: {'mean': 515.8815004099985, 'median': 515.8990260999985, 'std': 1.398351159939487}
+    optimized: {'mean': 422.23513323999214, 'median': 422.2725586500019, 'std': 0.7377336577128324}
+    unoptimized: {'mean': 516.301202410009, 'median': 515.9303043500131, 'std': 1.4246596086627232}
 
 
 
@@ -772,7 +772,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 10 minutes  21.250 seconds)
+   **Total running time of the script:** ( 10 minutes  42.126 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index de74183bb..4e9989b9e 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -282,7 +282,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.233e-07 secs/op
+    1.247e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index 3189e0ed4..0a90cb588 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -263,7 +263,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x213378d0)), stage(b, placeholder(b, 0x2514c800)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
+    [stage(a, placeholder(a, 0x207b98c0)), stage(b, placeholder(b, 0x213e5bb0)), stage(T_add, compute(T_add, body=[(a[ax0, ax1, ax2] + b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(min=0, ext=10))], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[(a[ax0, ax1, ax2]*b[ax1, ax2])], axis=[iter_var(ax0, range(min=0, ext=100)), iter_var(ax1, range(min=0, ext=10)), iter_var(ax2, range(mi [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index ca4282051..0666a483d 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,32 +5,32 @@
 
 Computation times
 =================
-**13:10.663** total execution time for **tutorial** files:
+**13:54.428** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:21.250 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 10:42.126 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:02.037 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:14.111 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 00:49.625 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:59.994 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:31.346 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:32.115 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.316 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:24.648 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.206 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.729 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.705 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.536 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.168 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.158 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.005 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_uma.py` (``uma.py``)                                             | 00:00.001 | 0.0 MB |
-+------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.001 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_uma.py` (``uma.py``)                                             | 00:00.002 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.001 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorial_install.py` (``install.py``)                                     | 00:00.001 | 0.0 MB |
++------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index e7bb44808..3103e6bd3 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -301,7 +301,7 @@ helper function to run a profile of the TVM generated code.
 
  .. code-block:: none
 
-    Numpy running time: 0.000007
+    Numpy running time: 0.000008
     naive: 0.000007
 
 
@@ -512,10 +512,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    6.5554399998291045e-06                   1.0
-                   naive    6.6549000000000004e-06    1.0151721318742126
-                parallel              7.8687e-06      1.2003313279055459
-                  vector    2.4573299999999997e-05    3.7485355675043333
+                   numpy    7.635150000169233e-06                    1.0
+                   naive              6.6651e-06      0.8729494508755254
+                parallel               8.218e-06      1.0763377274602133
+                  vector    2.4551799999999998e-05    3.2156277217154616
 
 
 
@@ -936,7 +936,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.019151
+    Numpy running time: 0.019348
 
 
 
@@ -996,7 +996,7 @@ optimizations.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    none: 3.482885
+    none: 3.252881
 
 
 
@@ -1101,7 +1101,7 @@ schedule.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    blocking: 0.306596
+    blocking: 0.336601
 
 
 
@@ -1199,7 +1199,7 @@ already cache friendly from our previous optimizations.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    vectorization: 0.340027
+    vectorization: 0.358317
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1275,7 +1275,7 @@ more cache friendly.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    loop permutation: 0.121665
+    loop permutation: 0.137859
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1376,7 +1376,7 @@ optimized schedule.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    array packing: 0.108316
+    array packing: 0.109896
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1471,7 +1471,7 @@ to `C` when all the block results are ready.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    block caching: 0.110654
+    block caching: 0.112883
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1559,7 +1559,7 @@ of thread-level parallelization.
 
     /workspace/python/tvm/driver/build_module.py:267: UserWarning: target_host parameter is going to be deprecated. Please pass in tvm.target.Target(target, host=target_host) instead.
       "target_host parameter is going to be deprecated. "
-    parallelization: 0.146965
+    parallelization: 0.147974
     @main = primfn(A_1: handle, B_1: handle, C_1: handle) -> ()
       attr = {"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}
       buffers = {A: Buffer(A_2: Pointer(float32), float32, [1048576], []),
@@ -1640,13 +1640,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none            3.4828853204                     1.0
-                blocking     0.30659591070000003     0.08802928678248537
-           vectorization            0.3400272215     0.09762802682832772
-        loop permutation     0.12166497360000002    0.034932236467098816
-           array packing            0.1083158621     0.03109946269708364
-           block caching     0.11065377800000001    0.031770721060460215
-         parallelization            0.1469650077    0.042196338432159884
+                    none            3.2528813825                     1.0
+                blocking     0.33660125929999996     0.10347787690964165
+           vectorization            0.3583168853     0.11015368934990669
+        loop permutation            0.1378592643    0.042380661355087085
+           array packing            0.1098959464     0.03378418499709926
+           block caching     0.11288335489999998     0.03470257338841035
+         parallelization     0.14797402939999998     0.04549013997131203
 
 
 
@@ -1686,11 +1686,6 @@ operations with tunable parameters that allows you to automatically optimize
 the computation for specific platforms.
 
 
-.. rst-class:: sphx-glr-timing
-
-   **Total running time of the script:** ( 1 minutes  2.037 seconds)
-
-
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
 
 .. only:: html
diff --git a/docs/commit_hash b/docs/commit_hash
index ddb269f08..59ba769cb 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-75969647fdf5e9f9b60635d1409952c97a29f0e4
+029fa462d22ce3c75bc5ea530eece999a160c05b
diff --git a/docs/how_to/compile_models/from_darknet.html b/docs/how_to/compile_models/from_darknet.html
index 7cfe23728..25fb46014 100644
--- a/docs/how_to/compile_models/from_darknet.html
+++ b/docs/how_to/compile_models/from_darknet.html
@@ -574,7 +574,7 @@ class:[&#39;truck 0.9266&#39;] left:471 top:83 right:689 bottom:169
 class:[&#39;bicycle 0.9984&#39;] left:111 top:113 right:577 bottom:447
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  3.774 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  6.919 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-darknet-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7716f96385bd5abb6e822041e285be54/from_darknet.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_darknet.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_keras.html b/docs/how_to/compile_models/from_keras.html
index 003eb47dd..7cba98c7d 100644
--- a/docs/how_to/compile_models/from_keras.html
+++ b/docs/how_to/compile_models/from_keras.html
@@ -493,7 +493,7 @@ pip install -U tensorflow --user
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Relay top-1 id: 285, class name: Egyptian cat
 
 1/1 [==============================] - ETA: 0s
-1/1 [==============================] - 1s 987ms/step
+1/1 [==============================] - 1s 1s/step
 Keras top-1 id: 285, class name: Egyptian cat
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 9589849f6..e7ed40624 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -427,7 +427,7 @@ to download the full example code</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip4b3d6bf1-8db2-4a06-a57c-2b0e765bf118 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip042cf72f-1210-4030-b3cf-e4e9538b1927 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 48c52df3c..f7db02e04 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -435,12 +435,13 @@ Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdo
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
- 19%|#9        | 7.99M/41.5M [00:00&lt;00:00, 61.9MB/s]
- 35%|###4      | 14.3M/41.5M [00:00&lt;00:00, 58.1MB/s]
- 48%|####7     | 19.9M/41.5M [00:00&lt;00:00, 53.4MB/s]
- 77%|#######7  | 32.0M/41.5M [00:00&lt;00:00, 56.8MB/s]
- 93%|#########3| 38.6M/41.5M [00:00&lt;00:00, 60.2MB/s]
-100%|##########| 41.5M/41.5M [00:00&lt;00:00, 57.5MB/s]
+ 21%|##        | 8.60M/41.5M [00:00&lt;00:00, 90.2MB/s]
+ 41%|####1     | 17.2M/41.5M [00:00&lt;00:00, 68.1MB/s]
+ 58%|#####7    | 24.0M/41.5M [00:00&lt;00:00, 49.5MB/s]
+ 77%|#######7  | 32.0M/41.5M [00:00&lt;00:00, 48.5MB/s]
+ 89%|########9 | 36.9M/41.5M [00:00&lt;00:00, 45.9MB/s]
+100%|#########9| 41.5M/41.5M [00:00&lt;00:00, 43.3MB/s]
+100%|##########| 41.5M/41.5M [00:00&lt;00:00, 48.6MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index eb4245963..068c1088a 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -414,9 +414,10 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 28%|##7       | 12.4M/44.7M [00:00&lt;00:00, 130MB/s]
- 76%|#######6  | 34.1M/44.7M [00:00&lt;00:00, 187MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 188MB/s]
+  7%|6         | 3.07M/44.7M [00:00&lt;00:01, 32.1MB/s]
+ 14%|#3        | 6.22M/44.7M [00:00&lt;00:01, 32.6MB/s]
+ 66%|######5   | 29.3M/44.7M [00:00&lt;00:00, 128MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 132MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index de0647fbf..ac981cf9b 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -636,7 +636,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.983 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.418 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index ebbe095ba..2fc80562c 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:09.598</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>05:24.099</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -336,43 +336,43 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:04.983</p></td>
+<td><p>01:10.418</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>01:03.774</p></td>
+<td><p>01:06.919</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>00:39.974</p></td>
+<td><p>00:41.947</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:28.225</p></td>
+<td><p>00:29.187</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:26.516</p></td>
+<td><p>00:25.999</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:25.020</p></td>
+<td><p>00:25.898</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:22.422</p></td>
+<td><p>00:23.396</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:19.619</p></td>
+<td><p>00:20.679</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:16.735</p></td>
+<td><p>00:17.132</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.331</p></td>
+<td><p>00:02.522</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index c589af946..e18589c14 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -653,7 +653,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  16.0189      15.9961      16.1743      15.8860       0.0982
+  16.5020      16.4444      17.0750      16.3679       0.1983
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index a0ac67aa2..f6bba62a0 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -436,15 +436,38 @@ be unstable.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
-  3%|2         | 4.52M/170M [00:00&lt;00:03, 47.4MB/s]
-  9%|8         | 15.1M/170M [00:00&lt;00:01, 84.6MB/s]
- 21%|##1       | 35.9M/170M [00:00&lt;00:00, 145MB/s]
- 36%|###5      | 60.9M/170M [00:00&lt;00:00, 191MB/s]
- 52%|#####1    | 87.6M/170M [00:00&lt;00:00, 223MB/s]
- 70%|######9   | 119M/170M [00:00&lt;00:00, 258MB/s]
- 85%|########5 | 144M/170M [00:00&lt;00:00, 261MB/s]
-100%|#########9| 169M/170M [00:00&lt;00:00, 256MB/s]
-100%|##########| 170M/170M [00:00&lt;00:00, 220MB/s]
+  2%|2         | 3.75M/170M [00:00&lt;00:04, 39.2MB/s]
+  6%|5         | 10.0M/170M [00:00&lt;00:03, 54.6MB/s]
+  9%|8         | 15.2M/170M [00:00&lt;00:03, 47.4MB/s]
+ 12%|#2        | 20.4M/170M [00:00&lt;00:03, 50.1MB/s]
+ 16%|#5        | 26.5M/170M [00:00&lt;00:02, 54.6MB/s]
+ 19%|#8        | 31.8M/170M [00:00&lt;00:02, 51.1MB/s]
+ 22%|##1       | 36.8M/170M [00:00&lt;00:02, 50.7MB/s]
+ 25%|##4       | 41.6M/170M [00:00&lt;00:02, 50.3MB/s]
+ 27%|##7       | 46.5M/170M [00:00&lt;00:02, 49.0MB/s]
+ 31%|###       | 52.4M/170M [00:01&lt;00:02, 52.0MB/s]
+ 35%|###4      | 58.9M/170M [00:01&lt;00:02, 56.8MB/s]
+ 38%|###7      | 64.4M/170M [00:01&lt;00:01, 56.5MB/s]
+ 41%|####1     | 69.8M/170M [00:01&lt;00:01, 56.6MB/s]
+ 44%|####4     | 75.2M/170M [00:01&lt;00:01, 50.6MB/s]
+ 48%|####7     | 81.1M/170M [00:01&lt;00:01, 53.7MB/s]
+ 51%|#####     | 86.4M/170M [00:01&lt;00:01, 46.2MB/s]
+ 54%|#####3    | 91.0M/170M [00:01&lt;00:01, 46.4MB/s]
+ 57%|#####6    | 96.6M/170M [00:01&lt;00:01, 49.3MB/s]
+ 60%|######    | 102M/170M [00:02&lt;00:01, 52.5MB/s]
+ 63%|######3   | 108M/170M [00:02&lt;00:01, 49.9MB/s]
+ 66%|######6   | 112M/170M [00:02&lt;00:01, 47.0MB/s]
+ 69%|######8   | 117M/170M [00:02&lt;00:01, 44.9MB/s]
+ 72%|#######1  | 122M/170M [00:02&lt;00:01, 44.8MB/s]
+ 75%|#######5  | 128M/170M [00:02&lt;00:00, 50.2MB/s]
+ 78%|#######8  | 133M/170M [00:02&lt;00:00, 52.3MB/s]
+ 82%|########2 | 139M/170M [00:02&lt;00:00, 55.6MB/s]
+ 85%|########5 | 145M/170M [00:03&lt;00:00, 48.8MB/s]
+ 89%|########8 | 151M/170M [00:03&lt;00:00, 52.2MB/s]
+ 92%|#########1| 156M/170M [00:03&lt;00:00, 53.2MB/s]
+ 95%|#########5| 161M/170M [00:03&lt;00:00, 43.9MB/s]
+ 98%|#########8| 167M/170M [00:03&lt;00:00, 47.0MB/s]
+100%|##########| 170M/170M [00:03&lt;00:00, 50.0MB/s]
 /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:3878: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /usr/local/lib/python3.7/dist-packages/torchvision/models/detection/anchor_utils.py:127: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=&#39;floor&#39;).
@@ -542,7 +565,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  2.729 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  15.342 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index cdb54d1d6..cbedb19cd 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -480,9 +480,7 @@ training. Other models require a full post training calibration.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
- 26%|##5       | 3.48M/13.6M [00:00&lt;00:00, 36.2MB/s]
- 63%|######3   | 8.56M/13.6M [00:00&lt;00:00, 46.2MB/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 52.9MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 190MB/s]
 </pre></div>
 </div>
 </div>
@@ -571,7 +569,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.3306      90.2257      92.9293      90.0316       0.3912
+  90.5458      90.3687      96.7163      90.2102       0.8075
 </pre></div>
 </div>
 <div class="admonition note">
@@ -610,7 +608,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  11.458 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  13.587 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index 0c5d4ca82..e262212a7 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -573,7 +573,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  119.5119     119.5018     120.4973     118.7849      0.3286
+  121.1934     121.0105     129.8010     120.2194      1.0326
 </pre></div>
 </div>
 <div class="admonition note">
@@ -601,7 +601,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  53.753 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  58.345 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 029d3a7b2..4f187ca1d 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -509,7 +509,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  28.023 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  27.791 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index cd289d4bb..f743c08da 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -441,25 +441,22 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  4%|3         | 5272/132723 [00:00&lt;00:02, 52712.89KB/s]
-  9%|9         | 12402/132723 [00:00&lt;00:01, 63641.72KB/s]
- 15%|#4        | 19831/132723 [00:00&lt;00:01, 68500.67KB/s]
- 20%|##        | 26921/132723 [00:00&lt;00:01, 69444.66KB/s]
- 26%|##5       | 34116/132723 [00:00&lt;00:01, 70346.61KB/s]
- 31%|###1      | 41199/132723 [00:00&lt;00:01, 70508.35KB/s]
- 37%|###6      | 48639/132723 [00:00&lt;00:01, 71777.88KB/s]
- 42%|####2     | 55997/132723 [00:00&lt;00:01, 72349.87KB/s]
- 48%|####7     | 63464/132723 [00:00&lt;00:00, 73068.72KB/s]
- 53%|#####3    | 70933/132723 [00:01&lt;00:00, 73566.94KB/s]
- 59%|#####9    | 78320/132723 [00:01&lt;00:00, 73657.29KB/s]
- 65%|######4   | 85819/132723 [00:01&lt;00:00, 74060.66KB/s]
- 70%|#######   | 93325/132723 [00:01&lt;00:00, 74358.88KB/s]
- 76%|#######5  | 100842/132723 [00:01&lt;00:00, 74600.92KB/s]
- 82%|########1 | 108303/132723 [00:01&lt;00:00, 74461.83KB/s]
- 87%|########7 | 115776/132723 [00:01&lt;00:00, 74540.91KB/s]
- 93%|#########2| 123292/132723 [00:01&lt;00:00, 74725.30KB/s]
- 99%|#########8| 130824/132723 [00:01&lt;00:00, 74898.16KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 72673.01KB/s]
+  5%|4         | 6366/132723 [00:00&lt;00:01, 63648.68KB/s]
+ 11%|#1        | 14774/132723 [00:00&lt;00:01, 75663.51KB/s]
+ 17%|#7        | 23165/132723 [00:00&lt;00:01, 79425.04KB/s]
+ 24%|##3       | 31693/132723 [00:00&lt;00:01, 81734.01KB/s]
+ 30%|###       | 40122/132723 [00:00&lt;00:01, 82649.01KB/s]
+ 37%|###6      | 48586/132723 [00:00&lt;00:01, 83318.62KB/s]
+ 43%|####3     | 57153/132723 [00:00&lt;00:00, 84085.24KB/s]
+ 50%|####9     | 65727/132723 [00:00&lt;00:00, 84609.19KB/s]
+ 56%|#####5    | 74294/132723 [00:00&lt;00:00, 84937.90KB/s]
+ 62%|######2   | 82788/132723 [00:01&lt;00:00, 84767.69KB/s]
+ 69%|######8   | 91312/132723 [00:01&lt;00:00, 84907.79KB/s]
+ 75%|#######5  | 99869/132723 [00:01&lt;00:00, 85105.78KB/s]
+ 82%|########1 | 108380/132723 [00:01&lt;00:00, 85069.70KB/s]
+ 88%|########8 | 116937/132723 [00:01&lt;00:00, 85218.20KB/s]
+ 95%|#########4| 125459/132723 [00:01&lt;00:00, 84880.54KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 83491.23KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -502,7 +499,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  41.715 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  46.808 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index 88115f267..c680046c3 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>11:35.081</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>12:02.036</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -336,35 +336,35 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>03:02.729</p></td>
+<td><p>03:15.342</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>02:41.715</p></td>
+<td><p>02:46.808</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>01:53.753</p></td>
+<td><p>01:58.345</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:28.023</p></td>
+<td><p>01:27.791</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:11.458</p></td>
+<td><p>01:13.587</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:31.836</p></td>
+<td><p>00:32.485</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_nano.html#sphx-glr-how-to-deploy-models-deploy-model-on-nano-py"><span class="std std-ref">Deploy the Pretrained Model on Jetson Nano</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_nano.py</span></code>)</p></td>
-<td><p>00:22.961</p></td>
+<td><p>00:24.158</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:22.598</p></td>
+<td><p>00:23.513</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index b5a822c53..b6f8bffe1 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -612,7 +612,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip09334d33-df49-4c93-aaa6-395497eee699 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipb4573aff-917b-43a6-8528-13d5b714b02e from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index 0b1dfa456..9dbd8be06 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:42.138</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:42.814</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -336,15 +336,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:38.907</p></td>
+<td><p>00:39.495</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.235</p></td>
+<td><p>00:02.320</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:00.986</p></td>
+<td><p>00:00.991</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index 9a24e7825..9bb295bf3 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -512,10 +512,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6768us [6768us] (45.72%; 45.72%)
-FoldScaleAxis: 8036us [7us] (54.28%; 54.28%)
-        FoldConstant: 8030us [1670us] (54.24%; 99.92%)
-                InferType: 6360us [6360us] (42.96%; 79.20%)
+InferType: 7457us [7457us] (47.24%; 47.24%)
+FoldScaleAxis: 8330us [7us] (52.76%; 52.76%)
+        FoldConstant: 8323us [1752us] (52.72%; 99.92%)
+                InferType: 6572us [6572us] (41.63%; 78.96%)
 </pre></div>
 </div>
 </div>
@@ -537,10 +537,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 6472us [6472us] (44.72%; 44.72%)
-FoldScaleAxis: 8000us [6us] (55.28%; 55.28%)
-        FoldConstant: 7994us [1712us] (55.24%; 99.93%)
-                InferType: 6281us [6281us] (43.41%; 78.58%)
+InferType: 6677us [6677us] (44.76%; 44.76%)
+FoldScaleAxis: 8242us [7us] (55.24%; 55.24%)
+        FoldConstant: 8235us [1721us] (55.20%; 99.92%)
+                InferType: 6514us [6514us] (43.67%; 79.11%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index fc35fe7ac..72bb8fbc3 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -564,7 +564,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.159255 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.123106 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index 5a7676160..b37872531 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -906,7 +906,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 6.841903 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 13.381761 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index 2f9f7b4f8..f81c3322e 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -461,8 +461,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019053
-Baseline: 3.484342
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019442
+Baseline: 3.325164
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -522,7 +522,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.308756
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.336239
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -589,7 +589,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.350427
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.355516
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -650,7 +650,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.123910
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.135305
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -733,7 +733,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110596
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.110355
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -819,7 +819,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111957
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.112382
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -909,7 +909,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.147962
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.148586
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index 9ff004a35..353e189da 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:35.177</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:35.477</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -336,15 +336,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:32.956</p></td>
+<td><p>00:32.953</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.228</p></td>
+<td><p>00:01.386</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:00.993</p></td>
+<td><p>00:01.138</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index e279705dd..78d7cd0f9 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>06:21.425</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>06:37.339</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -336,27 +336,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>03:30.817</p></td>
+<td><p>03:44.426</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:24.333</p></td>
+<td><p>01:26.025</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>00:47.837</p></td>
+<td><p>00:48.701</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:20.289</p></td>
+<td><p>00:19.544</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:09.196</p></td>
+<td><p>00:09.434</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:08.953</p></td>
+<td><p>00:09.209</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index e2b912398..f358bc675 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -494,11 +494,11 @@ cooperative fetching, unrolling and operator fusion.</p>
              compute: Buffer(compute_2: Pointer(float32), float32, [25088], [])}
   buffer_map = {data_1: data, kernel_1: kernel, bias_1: bias, compute_1: compute}
   preflattened_buffer_map = {data_1: data_3: Buffer(data_2, float32, [1, 512, 7, 7], []), kernel_1: kernel_3: Buffer(kernel_2, float32, [512, 512, 3, 3], []), bias_1: bias_3: Buffer(bias_2, float32, [1, 512, 1, 1], []), compute_1: compute_3: Buffer(compute_2, float32, [1, 512, 7, 7], [])} {
-  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 8;
+  attr [IterVar(blockIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;blockIdx.x&quot;)] &quot;thread_extent&quot; = 28;
   allocate(conv2d_nchw: Pointer(local float32), float32, [14]), storage_scope = local;
-  allocate(pad_temp.shared: Pointer(shared float32), float32, [504]), storage_scope = shared;
-  allocate(kernel.shared: Pointer(shared float32), float32, [1536]), storage_scope = shared;
-  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224 {
+  allocate(pad_temp.shared: Pointer(shared float32), float32, [72]), storage_scope = shared;
+  allocate(kernel.shared: Pointer(shared float32), float32, [3072]), storage_scope = shared;
+  attr [IterVar(threadIdx.x: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
     conv2d_nchw_1: Buffer(conv2d_nchw, float32, [14], [], scope=&quot;local&quot;, align=32)[0] = 0f32
     conv2d_nchw_1[1] = 0f32
     conv2d_nchw_1[2] = 0f32
@@ -515,85 +515,461 @@ cooperative fetching, unrolling and operator fusion.</p>
     conv2d_nchw_1[13] = 0f32
     for (rc.outer.outer: int32, 0, 64) {
       for (ry.outer.outer: int32, 0, 3) {
-        let cse_var_4: int32 = (rc.outer.outer*392)
-        let cse_var_3: int32 = (ry.outer.outer*7)
         let cse_var_2: int32 = (rc.outer.outer*72)
         let cse_var_1: int32 = (ry.outer.outer*3)
          {
-          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          pad_temp.shared_1: Buffer(pad_temp.shared, float32, [504], [], scope=&quot;shared&quot;)[threadIdx.x_1] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod(threadIdx.x_1, 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod(threadIdx.x_1, 9))) &amp;&amp; (floormod(threadIdx.x_1, 9) &lt; 8)), data[((((cse_var_4 + (floordiv(threadIdx.x_1, 9)*7)) + cse_var_3) + floormod(threadIdx.x_1, 9)) - 8)], 0f3 [...]
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          pad_temp.shared_1[(threadIdx.x_1 + 224)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 35), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 8), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 8), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 224), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 8), 9)) - 8)], 0f32, dtype=float32)
-          attr [IterVar(threadIdx.x_1, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          if @tir.likely((threadIdx.x_1 &lt; 56), dtype=bool) {
-            pad_temp.shared_1[(threadIdx.x_1 + 448)] = @tir.if_then_else(((((1 &lt;= (floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer)) &amp;&amp; ((floordiv(floormod((threadIdx.x_1 + 7), 63), 9) + ry.outer.outer) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1 + 7), 9))) &amp;&amp; (floormod((threadIdx.x_1 + 7), 9) &lt; 8)), data[((((cse_var_4 + (floordiv((threadIdx.x_1 + 448), 9)*7)) + cse_var_3) + floormod((threadIdx.x_1 + 7), 9)) - 8)], 0f32, dtype=float32)
-          }
-          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          kernel.shared_1: Buffer(kernel.shared, float32, [1536], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((blockIdx.x*294912) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          kernel.shared_1[(threadIdx.x_2 + 224)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 224), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          kernel.shared_1[(threadIdx.x_2 + 672)] = kernel[(((((((blockIdx.x*294912) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 129024)]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          kernel.shared_1[(threadIdx.x_2 + 1120)] = kernel[((((((blockIdx.x*294912) + (floordiv((threadIdx.x_2 + 1120), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
-          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 224;
-          if @tir.likely((threadIdx.x_2 &lt; 192), dtype=bool) {
-            kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((blockIdx.x*294912) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
-          }
-          for (rc.outer.inner: int32, 0, 8) {
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3))]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 1)]))
-            conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 2)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9))]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 24)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 1)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 25)]))
-            conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 2)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-            conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 3)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-            conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 4)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-            conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 5)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-            conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 6)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-            conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 7)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
-            conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[(((rc.outer.inner*63) + (floormod(threadIdx.x, 7)*9)) + 8)]*kernel.shared_1[(((floordiv(threadIdx.x, 7)*48) + (rc.outer.inner*3)) + 26)]))
+          attr [IterVar(threadIdx.x_1: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64 {
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1: Buffer(pad_temp.shared, float32, [72], [], scope=&quot;shared&quot;)[(threadIdx.x_1*4)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod((threadIdx.x_1*4), 9))) &amp;&amp; (floormod((threadIdx.x_1*4), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv((threadIdx.x_1*4), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) +  [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 1)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 1), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 1), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 1), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 1), 9)) - 8)], 0 [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 2)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 2), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 2), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 2), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 2), 9)) - 8)], 0 [...]
+            }
+            if @tir.likely((threadIdx.x_1 &lt; 18), dtype=bool) {
+              pad_temp.shared_1[((threadIdx.x_1*4) + 3)] = @tir.if_then_else(((((1 &lt;= (ry.outer.outer + floormod(blockIdx.x, 7))) &amp;&amp; ((ry.outer.outer + floormod(blockIdx.x, 7)) &lt; 8)) &amp;&amp; (1 &lt;= floormod(((threadIdx.x_1*4) + 3), 9))) &amp;&amp; (floormod(((threadIdx.x_1*4) + 3), 9) &lt; 8)), data[((((((rc.outer.outer*392) + (floordiv(((threadIdx.x_1*4) + 3), 9)*49)) + (ry.outer.outer*7)) + (floormod(blockIdx.x, 7)*7)) + floormod(((threadIdx.x_1*4) + 3), 9)) - 8)], 0 [...]
+            }
           }
+          attr [IterVar(threadIdx.x_2: int32, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1: Buffer(kernel.shared, float32, [3072], [], scope=&quot;shared&quot;)[threadIdx.x_2] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 64)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 64), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 128)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 128), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 192)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 36864)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 256)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 256), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 320)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 320), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 384)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 73728)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 448)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 448), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 512)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 512), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 576)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 110592)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 640)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 640), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 704)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 704), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 768)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 147456)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 832)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 832), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 896)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 896), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 960)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 184320)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1024)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1024), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1088)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1088), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1152)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 221184)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1216)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1216), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1280)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1280), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1344)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 258048)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1408)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1408), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1472)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1472), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1536)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 294912)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1600)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1600), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1664)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1664), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1728)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 331776)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1792)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1792), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1856)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1856), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1920)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 368640)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 1984)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 1984), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2048)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2048), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2112)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 405504)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2176)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2176), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2240)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2240), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2304)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 442368)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2368)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2368), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2432)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2432), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2496)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 479232)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2560)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2560), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2624)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2624), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2688)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 516096)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2752)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2752), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2816)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2816), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2880)] = kernel[(((((((floordiv(blockIdx.x, 7)*589824) + (floordiv(threadIdx.x_2, 24)*4608)) + cse_var_2) + (floordiv(floormod(threadIdx.x_2, 24), 3)*9)) + cse_var_1) + floormod(threadIdx.x_2, 3)) + 552960)]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 2944)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 2944), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 16), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 1), 3))]
+          attr [IterVar(threadIdx.x_2, (nullptr), &quot;ThreadIndex&quot;, &quot;threadIdx.x&quot;)] &quot;thread_extent&quot; = 64;
+          kernel.shared_1[(threadIdx.x_2 + 3008)] = kernel[((((((floordiv(blockIdx.x, 7)*589824) + (floordiv((threadIdx.x_2 + 3008), 24)*4608)) + cse_var_2) + (floordiv(floormod((threadIdx.x_2 + 8), 24), 3)*9)) + cse_var_1) + floormod((threadIdx.x_2 + 2), 3))]
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[0]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[1]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[2]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[3]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[4]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[5]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[6]*kernel.shared_1[(threadIdx.x*48)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 3)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[0]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[9]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 24)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 27)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 1)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 4)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[1]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[10]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 25)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 28)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 2)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 5)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[2]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[11]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[3]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[12]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[4]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[13]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[5]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[14]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[6]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[15]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[7]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[16]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[8]*kernel.shared_1[((threadIdx.x*48) + 26)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[17]*kernel.shared_1[((threadIdx.x*48) + 29)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 6)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 9)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[18]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[27]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 30)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 33)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 7)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 10)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[19]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[28]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 31)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 34)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 8)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 11)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[20]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[29]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[21]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[30]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[22]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[31]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[23]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[32]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[24]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[33]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[25]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[34]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[26]*kernel.shared_1[((threadIdx.x*48) + 32)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[35]*kernel.shared_1[((threadIdx.x*48) + 35)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 12)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 15)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[36]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[45]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 36)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 39)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 13)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 16)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[37]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[46]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 37)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 40)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 14)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 17)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[38]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[47]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[39]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[48]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[40]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[49]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[41]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[50]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[42]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[51]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[43]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[52]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[44]*kernel.shared_1[((threadIdx.x*48) + 38)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[53]*kernel.shared_1[((threadIdx.x*48) + 41)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 18)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 21)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[54]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[63]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 42)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 45)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 19)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 22)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[55]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[64]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 43)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 46)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[0] = (conv2d_nchw_1[0] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[1] = (conv2d_nchw_1[1] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[2] = (conv2d_nchw_1[2] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[3] = (conv2d_nchw_1[3] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[4] = (conv2d_nchw_1[4] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[5] = (conv2d_nchw_1[5] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 20)]))
+          conv2d_nchw_1[6] = (conv2d_nchw_1[6] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 23)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[56]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[7] = (conv2d_nchw_1[7] + (pad_temp.shared_1[65]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[57]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[8] = (conv2d_nchw_1[8] + (pad_temp.shared_1[66]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[58]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[9] = (conv2d_nchw_1[9] + (pad_temp.shared_1[67]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[59]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[10] = (conv2d_nchw_1[10] + (pad_temp.shared_1[68]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[60]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[11] = (conv2d_nchw_1[11] + (pad_temp.shared_1[69]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[61]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[12] = (conv2d_nchw_1[12] + (pad_temp.shared_1[70]*kernel.shared_1[((threadIdx.x*48) + 47)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[62]*kernel.shared_1[((threadIdx.x*48) + 44)]))
+          conv2d_nchw_1[13] = (conv2d_nchw_1[13] + (pad_temp.shared_1[71]*kernel.shared_1[((threadIdx.x*48) + 47)]))
         }
       }
     }
     for (i1.inner: int32, 0, 2) {
       for (i3.inner: int32, 0, 7) {
-        compute[(((((blockIdx.x*3136) + (floordiv(threadIdx.x, 7)*98)) + (i1.inner*49)) + (floormod(threadIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((blockIdx.x*64) + (floordiv(threadIdx.x, 7)*2)) + i1.inner)]), 0f32)
+        compute[(((((floordiv(blockIdx.x, 7)*6272) + (threadIdx.x*98)) + (i1.inner*49)) + (floormod(blockIdx.x, 7)*7)) + i3.inner)] = max((conv2d_nchw_1[((i1.inner*7) + i3.inner)] + bias[(((floordiv(blockIdx.x, 7)*128) + (threadIdx.x*2)) + i1.inner)]), 0f32)
       }
     }
   }
@@ -631,7 +1007,7 @@ cooperative fetching, unrolling and operator fusion.</p>
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.355 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.356 ms
 </pre></div>
 </div>
 </div>
@@ -662,31 +1038,31 @@ conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
 conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
 conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=32)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
 conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
+conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
-conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=7)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
 conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
-conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=1)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=8)
+conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
 conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
 conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
-conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=3)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
+conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
 compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=32)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
 compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
 compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
 compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
 compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
@@ -709,14 +1085,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=224)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=224)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 64)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -734,10 +1110,10 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(224) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
   float conv2d_nchw[14];
-  __shared__ float pad_temp_shared[504];
-  __shared__ float kernel_shared[1536];
+  __shared__ float pad_temp_shared[72];
+  __shared__ float kernel_shared[3072];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
@@ -755,70 +1131,408 @@ extern &quot;C&quot; __global__ void __launch_bounds__(224) default_function_ker
   for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
     for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
       __syncthreads();
-      pad_temp_shared[((int)threadIdx.x)] = (((((1 &lt;= (((((int)threadIdx.x) % 63) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= (((int)threadIdx.x) % 9))) &amp;&amp; ((((int)threadIdx.x) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + ((((int)threadIdx.x) / 9) * 7)) + (ry_outer_outer * 7)) + (((int)threadIdx.x) % 9)) - 8)] : 0.000000e+00f);
-      pad_temp_shared[(((int)threadIdx.x) + 224)] = (((((1 &lt;= ((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer)) &amp;&amp; (((((((int)threadIdx.x) + 35) % 63) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 8) % 9))) &amp;&amp; (((((int)threadIdx.x) + 8) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 224) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 8) % 9)) - 8)] : 0.000000e+00f);
-      if (((int)threadIdx.x) &lt; 56) {
-        pad_temp_shared[(((int)threadIdx.x) + 448)] = (((((1 &lt;= (((((int)threadIdx.x) + 7) / 9) + ry_outer_outer)) &amp;&amp; ((((((int)threadIdx.x) + 7) / 9) + ry_outer_outer) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) + 7) % 9))) &amp;&amp; (((((int)threadIdx.x) + 7) % 9) &lt; 8)) ? data[(((((rc_outer_outer * 392) + (((((int)threadIdx.x) + 448) / 9) * 7)) + (ry_outer_outer * 7)) + ((((int)threadIdx.x) + 7) % 9)) - 8)] : 0.000000e+00f);
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 9))) &amp;&amp; (((((int)threadIdx.x) * 4) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
       }
-      kernel_shared[((int)threadIdx.x)] = kernel[((((((((int)blockIdx.x) * 294912) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 224)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 224) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 672)] = kernel[(((((((((int)blockIdx.x) * 294912) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 129024)];
-      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1120)] = kernel[((((((((int)blockIdx.x) * 294912) + (((((int)threadIdx.x) + 1120) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      if (((int)threadIdx.x) &lt; 192) {
-        kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[(((((((((int)blockIdx.x) * 294912) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
       }
-      __syncthreads();
-      for (int rc_outer_inner = 0; rc_outer_inner &lt; 8; ++rc_outer_inner) {
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[(((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3))]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 1)]));
-        conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 2)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9))] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 24)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 1)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 25)]));
-        conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 2)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-        conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 3)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-        conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 4)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-        conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 5)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-        conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 6)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-        conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 7)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
-        conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[(((rc_outer_inner * 63) + ((((int)threadIdx.x) % 7) * 9)) + 8)] * kernel_shared[((((((int)threadIdx.x) / 7) * 48) + (rc_outer_inner * 3)) + 26)]));
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
       }
+      if (((int)threadIdx.x) &lt; 18) {
+        pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
+      }
+      kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
+      kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
+      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
+      kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
+      kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
+      kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
+      kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
+      kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
+      kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
+      kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
+      kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
+      kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
+      kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
+      kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
+      kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
+      kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
+      kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
+      __syncthreads();
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
+      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
+      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
     }
   }
   for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
     for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
-      compute[(((((((int)blockIdx.x) * 3136) + ((((int)threadIdx.x) / 7) * 98)) + (i1_inner * 49)) + ((((int)threadIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[(((((int)blockIdx.x) * 64) + ((((int)threadIdx.x) / 7) * 2)) + i1_inner)]), 0.000000e+00f);
+      compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
     }
   }
 }
@@ -856,7 +1570,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  30.817 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  44.426 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index fde2b8c4a..1b5a61238 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -906,7 +906,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   9.7631       9.7916       9.8254       9.6723       0.0657
+   9.7732       9.7991       9.8094       9.7111       0.0441
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index 8bddec123..d90394728 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -925,7 +925,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  766.9692     765.5452     770.3598     765.0026      2.4077
+  772.5422     772.5742     772.6085     772.4439      0.0709
 </pre></div>
 </div>
 </div>
@@ -947,7 +947,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  24.333 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  26.025 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index d9a6807c4..cc9678de4 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -625,29 +625,30 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
              placeholder_4: Buffer(placeholder_14: Pointer(float32), float32, [65536], []),
              compute: Buffer(compute_2: Pointer(float32), float32, [65536], [])}
   buffer_map = {placeholder_5: placeholder, placeholder_6: placeholder_1, placeholder_7: placeholder_2, placeholder_8: placeholder_3, placeholder_9: placeholder_4, compute_1: compute}
-  preflattened_buffer_map = {compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_6: placeholder_15: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_9: placeholder_16: Buffer(placeholder_14, float32, [128, 512], []), placeholder_8: placeholder_17: Buffer(placeholder_13, int32, [33], []), placeholder_5: placeholder_18: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_19: Buffer(placeholder_12, int32, [4916], [])} {
-  for (i0.outer.i1.outer.fused: int32, 0, 32) &quot;parallel&quot; {
-    allocate(compute_4: Pointer(global float32), float32, [2048]), storage_scope = global {
-      for (i.outer.inner: int32, 0, 64) {
-        for (i.inner.init: int32, 0, 2) {
-          for (j.init: int32, 0, 16) {
-            compute_5: Buffer(compute_4, float32, [2048], [])[(((i.outer.inner*32) + (i.inner.init*16)) + j.init)] = 0f32
+  preflattened_buffer_map = {placeholder_6: placeholder_15: Buffer(placeholder_11, float32, [4916, 16, 1], []), placeholder_5: placeholder_16: Buffer(placeholder_10, float32, [128, 256], []), placeholder_7: placeholder_17: Buffer(placeholder_12, int32, [4916], []), placeholder_8: placeholder_18: Buffer(placeholder_13, int32, [33], []), compute_1: compute_3: Buffer(compute_2, float32, [128, 512], []), placeholder_9: placeholder_19: Buffer(placeholder_14, float32, [128, 512], [])} {
+  for (i0.outer.i1.outer.fused: int32, 0, 16) &quot;parallel&quot; {
+    allocate(compute_4: Pointer(global float32), float32, [4096]), storage_scope = global {
+      for (i.outer.inner: int32, 0, 8) {
+        for (nb_j.inner: int32, 0, 2) {
+          for (i.inner.init: int32, 0, 16) {
+            for (j.init: int32, 0, 16) {
+              compute_5: Buffer(compute_4, float32, [4096], [])[((((i.outer.inner*512) + (i.inner.init*32)) + (nb_j.inner*16)) + j.init)] = 0f32
+            }
           }
-        }
-        for (elem_idx: int32, 0, (placeholder_3[(i0.outer.i1.outer.fused + 1)] - placeholder_3[i0.outer.i1.outer.fused])) {
-          for (i.inner: int32, 0, 2) {
-            for (j: int32, 0, 16) {
-              if @tir.likely((elem_idx &lt; (placeholder_3[(i0.outer.i1.outer.fused + 1)] - placeholder_3[i0.outer.i1.outer.fused])), dtype=bool) {
-                let cse_var_1: int32 = (((i.outer.inner*32) + (i.inner*16)) + j)
-                compute_5[cse_var_1] = (compute_5[cse_var_1] + (placeholder_1[(((placeholder_3[i0.outer.i1.outer.fused]*16) + (elem_idx*16)) + j)]*max(placeholder[(((i.outer.inner*512) + (i.inner*256)) + placeholder_2[(placeholder_3[i0.outer.i1.outer.fused] + elem_idx)])], 0f32)))
+          for (elem_idx: int32, 0, let cse_var_1: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner) in (placeholder_3[(cse_var_1 + 1)] - placeholder_3[cse_var_1])) {
+            for (i.inner: int32, 0, 16) {
+              for (j: int32, 0, 16) {
+                let cse_var_3: int32 = ((i0.outer.i1.outer.fused*2) + nb_j.inner)
+                let cse_var_2: int32 = ((((i.outer.inner*512) + (i.inner*32)) + (nb_j.inner*16)) + j)
+                compute_5[cse_var_2] = (compute_5[cse_var_2] + (placeholder_1[(((placeholder_3[cse_var_3]*16) + (elem_idx*16)) + j)]*max(placeholder[(((i.outer.inner*4096) + (i.inner*256)) + placeholder_2[(placeholder_3[cse_var_3] + elem_idx)])], 0f32)))
               }
             }
           }
         }
       }
       for (i0.inner: int32, 0, 128) {
-        let cse_var_2: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*16))
-        compute[ramp(cse_var_2, 1, 16)] = max((compute_5[ramp((i0.inner*16), 1, 16)] + placeholder_4[ramp(cse_var_2, 1, 16)]), broadcast(0f32, 16))
+        let cse_var_4: int32 = ((i0.inner*512) + (i0.outer.i1.outer.fused*32))
+        compute[ramp(cse_var_4, 1, 32)] = max((compute_5[ramp((i0.inner*32), 1, 32)] + placeholder_4[ramp(cse_var_4, 1, 32)]), broadcast(0f32, 32))
       }
     }
   }
@@ -685,7 +686,7 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 2.156 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.503 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index c7c9faf14..e3e76eb59 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:46.109</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:46.556</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -336,11 +336,11 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:46.073</p></td>
+<td><p>00:46.518</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
-<td><p>00:00.021</p></td>
+<td><p>00:00.022</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index c801f4da4..5606b8bbe 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -1436,8 +1436,8 @@ No: 8   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
 TimeoutError
 
         [(&#39;tile_f&#39;, [-1, 2, 1, 64]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4909501
-No: 9   GFLOPS: 187.30/187.30   result: MeasureResult(costs=(0.0012359777555555557,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.039618492126465, timestamp=1662746351.936573)        [(&#39;tile_f&#39;, [-1, 1, 4, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,5072689
-No: 10  GFLOPS: 0.00/187.30     result: Traceback (most recent call last):
+No: 9   GFLOPS: 80.79/80.79     result: MeasureResult(costs=(0.002865295857142857,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7858080863952637, timestamp=1662756943.1289473)       [(&#39;tile_f&#39;, [-1, 1, 4, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,5072689
+No: 10  GFLOPS: 0.00/80.79      result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1560,8 +1560,8 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 64, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,5092711
-No: 11  GFLOPS: 260.82/260.82   result: MeasureResult(costs=(0.000887597729281768,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.792834997177124, timestamp=1662746352.852904) [(&#39;tile_f&#39;, [-1, 8, 2, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4264713
-No: 12  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+No: 11  GFLOPS: 258.82/258.82   result: MeasureResult(costs=(0.0008944500614525141,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8252346515655518, timestamp=1662756944.1135225)      [(&#39;tile_f&#39;, [-1, 8, 2, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4264713
+No: 12  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1684,7 +1684,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 128, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 256]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,183542
-No: 13  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+No: 13  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1807,7 +1807,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 8, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 64]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2482196
-No: 14  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+No: 14  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -1930,9 +1930,9 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10306226
-No: 15  GFLOPS: 5.36/260.82     result: MeasureResult(costs=(0.04319207975,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8189799785614014, timestamp=1662746357.4339721)      [(&#39;tile_f&#39;, [-1, 2, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5330964
-No: 16  GFLOPS: 3.36/260.82     result: MeasureResult(costs=(0.0689593425,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.5808632373809814, timestamp=1662746358.669979)        [(&#39;tile_f&#39;, [-1, 8, 4, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2140058
-No: 17  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+No: 15  GFLOPS: 5.29/258.82     result: MeasureResult(costs=(0.043732662750000005,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8977558612823486, timestamp=1662756948.8730106)       [(&#39;tile_f&#39;, [-1, 2, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 8]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5330964
+No: 16  GFLOPS: 3.35/258.82     result: MeasureResult(costs=(0.06900273625,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.738688230514526, timestamp=1662756950.1517107)       [(&#39;tile_f&#39;, [-1, 8, 4, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2140058
+No: 17  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
     res = future.result()
   File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
@@ -1950,8 +1950,8 @@ No: 17  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
 TimeoutError
 
         [(&#39;tile_f&#39;, [-1, 2, 2, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10195251
-No: 18  GFLOPS: 27.03/260.82    result: MeasureResult(costs=(0.0085650848125,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.3159880638122559, timestamp=1662746369.7579346)    [(&#39;tile_f&#39;, [-1, 4, 8, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6068603
-No: 19  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+No: 18  GFLOPS: 26.30/258.82    result: MeasureResult(costs=(0.008800986583333333,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.159686803817749, timestamp=1662756961.0743973)        [(&#39;tile_f&#39;, [-1, 4, 8, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6068603
+No: 19  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2074,7 +2074,7 @@ Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 871, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
 tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 16, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6956993
-No: 20  GFLOPS: 0.00/260.82     result: Traceback (most recent call last):
+No: 20  GFLOPS: 0.00/258.82     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 588, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 540, in _build_func_common
@@ -2237,7 +2237,7 @@ and measure running time.</p>
 Best config:
 [(&#39;tile_f&#39;, [-1, 8, 2, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4264713
 Finish loading 20 records
-Time cost of this operator: 0.001269
+Time cost of this operator: 0.001225
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index 308f1344e..d92d49b4e 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -584,10 +584,10 @@ the tuned operator.</p>
 ########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  311.3     98.725   (1, 2, 10, 10, 3)  2       1        [311.3]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.052     0.968    (1, 6, 10, 10)     1       1        [3.052]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.969     0.307    (1, 1, 10, 10, 3)  1       1        [0.969]
-Total_time                                    -                                             315.32    -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  312.2     98.714   (1, 2, 10, 10, 3)  2       1        [312.2]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.112     0.984    (1, 6, 10, 10)     1       1        [3.112]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.956     0.302    (1, 1, 10, 10, 3)  1       1        [0.956]
+Total_time                                    -                                             316.268   -        -                  -       -        -
 </pre></div>
 </div>
 </div>
@@ -640,10 +640,10 @@ Total_time                                    -
 ########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  216.9     98.598   (1, 1, 10, 10, 6)  2       1        [216.9]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       2.242     1.019    (1, 6, 10, 10)     1       1        [2.242]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.842     0.383    (1, 3, 10, 10, 1)  1       1        [0.842]
-Total_time                                    -                                             219.983   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  123.1     97.808   (1, 6, 10, 10, 1)  2       1        [123.1]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.777     1.412    (1, 6, 10, 10)     1       1        [1.777]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.981     0.78     (1, 1, 10, 10, 3)  1       1        [0.981]
+Total_time                                    -                                             125.858   -        -                  -       -        -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index 1dfe7509a..3f991986e 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -516,7 +516,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpf74f6dwp/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmp21psypts/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -576,8 +576,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpf74f6dwp/images/target contains 8144 images
-/tmp/tmpf74f6dwp/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[1.0, 0.0], [1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [1.0, 0.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp21psypts/images/target contains 8144 images
+/tmp/tmp21psypts/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -689,13 +689,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 47s - loss: 0.2253 - accuracy: 0.9230 - val_loss: 0.1339 - val_accuracy: 0.9607 - 47s/epoch - 144ms/step
+328/328 - 49s - loss: 0.2119 - accuracy: 0.9272 - val_loss: 0.1349 - val_accuracy: 0.9558 - 49s/epoch - 148ms/step
 Epoch 2/3
-328/328 - 44s - loss: 0.1040 - accuracy: 0.9599 - val_loss: 0.1150 - val_accuracy: 0.9660 - 44s/epoch - 134ms/step
+328/328 - 44s - loss: 0.0957 - accuracy: 0.9631 - val_loss: 0.1137 - val_accuracy: 0.9600 - 44s/epoch - 135ms/step
 Epoch 3/3
-328/328 - 44s - loss: 0.0688 - accuracy: 0.9748 - val_loss: 0.1372 - val_accuracy: 0.9524 - 44s/epoch - 133ms/step
+328/328 - 44s - loss: 0.0609 - accuracy: 0.9766 - val_loss: 0.1349 - val_accuracy: 0.9543 - 44s/epoch - 134ms/step
 
-&lt;keras.callbacks.History object at 0x7fefc7708990&gt;
+&lt;keras.callbacks.History object at 0x7f72f597eb90&gt;
 </pre></div>
 </div>
 </div>
@@ -961,7 +961,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  37.531 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  40.190 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index 8f403a577..6ac0eccb9 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>05:32.139</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>05:37.256</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -336,19 +336,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>04:37.531</p></td>
+<td><p>04:40.190</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:42.954</p></td>
+<td><p>00:44.632</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_aot.html#sphx-glr-how-to-work-with-microtvm-micro-aot-py"><span class="std std-ref">microTVM Host-Driven AoT</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_aot.py</span></code>)</p></td>
-<td><p>00:08.291</p></td>
+<td><p>00:08.899</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:03.361</p></td>
+<td><p>00:03.533</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index 45ace0f9d..fb89b8f57 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:43.662</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:42.933</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -336,15 +336,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_pipeline_executor.html#sphx-glr-how-to-work-with-relay-using-pipeline-executor-py"><span class="std std-ref">Using Pipeline Executor in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_pipeline_executor.py</span></code>)</p></td>
-<td><p>00:32.334</p></td>
+<td><p>00:33.124</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:09.810</p></td>
+<td><p>00:08.427</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.511</p></td>
+<td><p>00:01.375</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index aa9f5021a..8a8366424 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -522,7 +522,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7fefac70d9e0&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f72cae55b90&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index f3f7de696..f706c29c5 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -327,7 +327,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:04.233</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:08.550</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -336,27 +336,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:01.981</p></td>
+<td><p>00:06.205</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:00.970</p></td>
+<td><p>00:01.042</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.557</p></td>
+<td><p>00:00.568</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.538</p></td>
+<td><p>00:00.543</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.103</p></td>
+<td><p>00:00.104</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
-<td><p>00:00.042</p></td>
+<td><p>00:00.045</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index ab7f63345..bf52092f6 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -577,7 +577,7 @@ The importing needs to happen before the tensorized GEMV being executed.</p>
              C: Buffer(C_2: Pointer(float32), float32, [524288], [])}
   buffer_map = {A_1: A, B_1: B, C_1: C}
   preflattened_buffer_map = {A_1: A_3: Buffer(A_2, float32, [1024, 64], []), B_1: B_3: Buffer(B_2, float32, [512, 64], []), C_1: C_3: Buffer(C_2, float32, [1024, 512], [])} {
-  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmpkl4so9ff/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpkl4so9ff/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
+  attr [IterVar(i: int32, (nullptr), &quot;DataPar&quot;, &quot;&quot;)] &quot;pragma_import_llvm&quot; = &quot;; ModuleID = &#39;/tmp/tmp5hvqdeql/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmp5hvqdeql/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = allo [...]
   for (i, 0, 1024) {
     for (j.outer: int32, 0, 32) {
       @tir.call_extern(&quot;gemv_update&quot;, @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), C_2, ((i*512) + (j.outer*16)), 16, 2, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), A_2, (i*64), 64, 1, dtype=handle), @tir.tvm_access_ptr(@tir.type_annotation(, dtype=float32), B_2, (j.outer*1024), 1024, 1, dtype=handle), 16, 64, 64, dtype=int32)
diff --git a/docs/install/nnpack.html b/docs/install/nnpack.html
index aa2238b85..3153785d7 100644
--- a/docs/install/nnpack.html
+++ b/docs/install/nnpack.html
@@ -224,17 +224,7 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="index.html">Installing TVM</a><ul class="current">
-<li class="toctree-l2 current"><a class="reference internal" href="from_source.html">Install from Source</a><ul class="current">
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#developers-get-source-from-github">Developers: Get Source from Github</a></li>
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#build-the-shared-library">Build the Shared Library</a></li>
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#python-package-installation">Python Package Installation</a></li>
-<li class="toctree-l3 current"><a class="reference internal" href="from_source.html#install-contrib-libraries">Install Contrib Libraries</a><ul class="current">
-<li class="toctree-l4 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a></li>
-</ul>
-</li>
-<li class="toctree-l3"><a class="reference internal" href="from_source.html#enable-c-tests">Enable C++ Tests</a></li>
-</ul>
-</li>
+<li class="toctree-l2"><a class="reference internal" href="from_source.html">Install from Source</a></li>
 <li class="toctree-l2"><a class="reference internal" href="docker.html">Docker Images</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="#conditions">Conditions</a></li>
diff --git a/docs/reference/api/doxygen/_2workspace_2include_2tvm_2script_2ir_builder_2base_8h-example.html b/docs/reference/api/doxygen/_2workspace_2include_2tvm_2script_2ir_builder_2base_8h-example.html
index 78f801f77..05b85770c 100644
--- a/docs/reference/api/doxygen/_2workspace_2include_2tvm_2script_2ir_builder_2base_8h-example.html
+++ b/docs/reference/api/doxygen/_2workspace_2include_2tvm_2script_2ir_builder_2base_8h-example.html
@@ -63,8 +63,8 @@ $(function() {
 </div><!--header-->
 <div class="contents">
 <p>A stack frame of the IRBuilder used to keep track of the current scope. Furthermore, the information stored in each stack frame can be useful for context-dependent IR construction.The <code>T::MatchBuffer</code> below adds an element in <code>PrimFuncNode::buffer_map</code>:</p>
-<div class="fragment"><div class="line"><span class="keyword">using</span> T = tvm::script::ir_builder::tir;</div><div class="line">With &lt;PrimFuncFrame&gt; _(...);</div><div class="line">Buffer buffer = T::MatchBuffer(...);</div></div><!-- fragment --><p>The <code>T::MatchBuffer</code> below instead generates <code>MatchBufferRegion</code> in a TIR block:</p>
-<div class="fragment"><div class="line"><span class="keyword">using</span> T = tvm::script::ir_builder::tir;</div><div class="line">With &lt;PrimFuncFrame&gt; _(...);</div><div class="line">{</div><div class="line">  With&lt;BlockFrame&gt; _2(...);</div><div class="line">  Buffer buffer = T::MatchBuffer(...);</div><div class="line">}</div></div><!-- fragment --><div class="fragment"><div class="line"><span class="comment">/*</span></div><div class="line"><span class="comment"> * Licensed [...]
+<div class="fragment"><div class="line"><span class="keyword">using</span> T = <a class="code" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>;</div><div class="line">With &lt;PrimFuncFrame&gt; _(...);</div><div class="line">Buffer buffer = T::MatchBuffer(...);</div></div><!-- fragment --><p>The <code>T::MatchBuffer</code> below instead generates <code>MatchBufferRegion</code> in a TIR block:</p>
+<div class="fragment"><div class="line"><span class="keyword">using</span> T = <a class="code" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>;</div><div class="line">With &lt;PrimFuncFrame&gt; _(...);</div><div class="line">{</div><div class="line">  With&lt;BlockFrame&gt; _2(...);</div><div class="line">  Buffer buffer = T::MatchBuffer(...);</div><div class="line">}</div></div><!-- fragment --><div class="fragment"><div class="line"><span class [...]
 <!-- start footer part -->
 <hr class="footer"/><address class="footer"><small>
 Generated by &#160;<a href="http://www.doxygen.org/index.html">
diff --git a/docs/reference/api/doxygen/annotated.html b/docs/reference/api/doxygen/annotated.html
index 4af4f4892..2816eafa6 100644
--- a/docs/reference/api/doxygen/annotated.html
+++ b/docs/reference/api/doxygen/annotated.html
@@ -681,12 +681,19 @@ $(function() {
 <tr id="row_1_8_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_1_8_0_" class="arrow" onclick="toggleFolder('1_8_0_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1script_1_1ir__builder.html" target="_self">ir_builder</a></td><td class="desc"></td></tr>
 <tr id="row_1_8_0_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_1_8_0_0_" class="arrow" onclick="toggleFolder('1_8_0_0_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1details.html" target="_self">details</a></td><td class="desc"></td></tr>
 <tr id="row_1_8_0_0_0_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1details_1_1Namer.html" target="_self">Namer</a></td><td class="desc"></td></tr>
-<tr id="row_1_8_0_1_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html" target="_self">IRBuilder</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html" title="A dialect-agnostic IRBuilder that constructs any IR of TVM. An idiomatic [...]
-<tr id="row_1_8_0_2_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html" target="_self">IRBuilderFrame</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html">IRBuilderFrameNode</a> </td></tr>
-<tr id="row_1_8_0_3_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html" target="_self">IRBuilderFrameNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_8_0_4_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html" target="_self">IRBuilderNode</a></td><td class="desc">A dialect-agnostic <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html" title="Managed reference to an IRBuilderNode. ">IRBuilder</a> that constructs an [...]
-<tr id="row_1_8_0_5_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRModuleFrame.html" target="_self">IRModuleFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRModuleFrameNode.html" title="A frame that represents the IRModule frame with functions and g [...]
-<tr id="row_1_8_0_6_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRModuleFrameNode.html" target="_self">IRModuleFrameNode</a></td><td class="desc">A frame that represents the <a class="el" href="classtvm_1_1IRModule.html" title="Managed reference class to IRModuleNode. ">IRModule</a> frame with functions and g [...]
+<tr id="row_1_8_0_1_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_1_8_0_1_" class="arrow" onclick="toggleFolder('1_8_0_1_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1ir.html" target="_self">ir</a></td><td class="desc"></td></tr>
+<tr id="row_1_8_0_1_0_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrame.html" target="_self">IRModuleFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html" title="A frame that represents the IRModule frame with f [...]
+<tr id="row_1_8_0_1_1_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html" target="_self">IRModuleFrameNode</a></td><td class="desc">A frame that represents the <a class="el" href="classtvm_1_1IRModule.html" title="Managed reference class to IRModuleNode. ">IRModule</a> frame with functio [...]
+<tr id="row_1_8_0_2_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span id="arr_1_8_0_2_" class="arrow" onclick="toggleFolder('1_8_0_2_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html" target="_self">tir</a></td><td class="desc"></td></tr>
+<tr id="row_1_8_0_2_0_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrameNode.html" target="_self">AssertFrameNode</a></td><td class="desc">A frame that represents the assert statement. Proceeds if the condition is true, otherwise aborts with the message </td></tr>
+<tr id="row_1_8_0_2_1_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrame.html" target="_self">PrimFuncFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrameNode.html" title="A frame that represents the PrimFunc containing [...]
+<tr id="row_1_8_0_2_2_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrameNode.html" target="_self">PrimFuncFrameNode</a></td><td class="desc">A frame that represents the PrimFunc containing TIR statements </td></tr>
+<tr id="row_1_8_0_2_3_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1TIRFrame.html" target="_self">TIRFrame</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1TIRFrameNode.html" title="A base frame that represents the TIR fame with body of stateme [...]
+<tr id="row_1_8_0_2_4_" class="even" style="display:none;"><td class="entry"><span style="width:80px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1TIRFrameNode.html" target="_self">TIRFrameNode</a></td><td class="desc">A base frame that represents the TIR fame with body of statements </td></tr>
+<tr id="row_1_8_0_3_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html" target="_self">IRBuilder</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html" title="A dialect-agnostic IRBuilder that constructs any IR of TVM. An idiomatic [...]
+<tr id="row_1_8_0_4_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html" target="_self">IRBuilderFrame</a></td><td class="desc">Managed reference to an <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html">IRBuilderFrameNode</a> </td></tr>
+<tr id="row_1_8_0_5_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html" target="_self">IRBuilderFrameNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_8_0_6_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html" target="_self">IRBuilderNode</a></td><td class="desc">A dialect-agnostic <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html" title="Managed reference to an IRBuilderNode. ">IRBuilder</a> that constructs an [...]
 <tr id="row_1_8_1_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_1_8_1_" class="arrow" onclick="toggleFolder('1_8_1_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1script_1_1printer.html" target="_self">printer</a></td><td class="desc"></td></tr>
 <tr id="row_1_8_1_0_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDoc.html" target="_self">AssertDoc</a></td><td class="desc">Reference type of <a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html" title="Doc that represents assert statement. ">AssertDocNode</a> </td></tr>
 <tr id="row_1_8_1_1_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html" target="_self">AssertDocNode</a></td><td class="desc"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html" title="Reference type of DocNode. ">Doc</a> that represents assert statement </td></tr>
diff --git a/docs/reference/api/doxygen/array_8h__dep__incl.svg b/docs/reference/api/doxygen/array_8h__dep__incl.svg
index 2d1bf796e..e59639452 100644
--- a/docs/reference/api/doxygen/array_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/array_8h__dep__incl.svg
@@ -189,9 +189,9 @@
 <path fill="none" stroke="#191970" d="M1922.9813,-797.1864C1823.2369,-778.64 1649,-737.9491 1649,-680 1649,-680 1649,-680 1649,-484.5 1649,-440.2885 1555.5645,-337.5363 1518.7013,-298.7178"/>
 <polygon fill="#191970" stroke="#191970" points="1922.5174,-800.6595 1932.9844,-799.0187 1923.7787,-793.7741 1922.5174,-800.6595"/>
 </g>
-<!-- Node143 -->
+<!-- Node144 -->
 <g id="node32" class="node">
-<title>Node143</title>
+<title>Node144</title>
 <g id="a_node32"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
 <polygon fill="#ffffff" stroke="#000000" points="2535,-268.5 2535,-298.5 2687,-298.5 2687,-268.5 2535,-268.5"/>
 <text text-anchor="start" x="2543" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -199,15 +199,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node143 -->
+<!-- Node19&#45;&gt;Node144 -->
 <g id="edge105" class="edge">
-<title>Node19&#45;&gt;Node143</title>
+<title>Node19&#45;&gt;Node144</title>
 <path fill="none" stroke="#191970" d="M2059.2745,-805.014C2191.5045,-797.8435 2477.9694,-780.1516 2574,-757 2662.5776,-735.6452 2764,-771.1154 2764,-680 2764,-680 2764,-680 2764,-417.5 2764,-379.8735 2769.6718,-363.4087 2745,-335 2729.9653,-317.6882 2708.4307,-306.168 2687.0097,-298.5125"/>
 <polygon fill="#191970" stroke="#191970" points="2059.0575,-801.5205 2049.2601,-805.5529 2059.4337,-808.5104 2059.0575,-801.5205"/>
 </g>
-<!-- Node144 -->
+<!-- Node145 -->
 <g id="node33" class="node">
-<title>Node144</title>
+<title>Node145</title>
 <g id="a_node33"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
 <polygon fill="#ffffff" stroke="#000000" points="2584,-335.5 2584,-365.5 2736,-365.5 2736,-335.5 2584,-335.5"/>
 <text text-anchor="start" x="2592" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -215,15 +215,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node144 -->
+<!-- Node19&#45;&gt;Node145 -->
 <g id="edge110" class="edge">
-<title>Node19&#45;&gt;Node144</title>
+<title>Node19&#45;&gt;Node145</title>
 <path fill="none" stroke="#191970" d="M2059.5575,-804.5949C2195.6124,-796.465 2490.2624,-776.824 2532,-757 2575.434,-736.3703 2608,-728.0843 2608,-680 2608,-680 2608,-680 2608,-484.5 2608,-438.8339 2634.5721,-389.9818 2649.7265,-365.8342"/>
 <polygon fill="#191970" stroke="#191970" points="2059.0291,-801.12 2049.2542,-805.2066 2059.4441,-808.1077 2059.0291,-801.12"/>
 </g>
-<!-- Node145 -->
+<!-- Node146 -->
 <g id="node34" class="node">
-<title>Node145</title>
+<title>Node146</title>
 <g id="a_node34"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
 <polygon fill="#ffffff" stroke="#000000" points="2895,-268.5 2895,-298.5 3047,-298.5 3047,-268.5 2895,-268.5"/>
 <text text-anchor="start" x="2903" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -231,15 +231,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node145 -->
+<!-- Node19&#45;&gt;Node146 -->
 <g id="edge108" class="edge">
-<title>Node19&#45;&gt;Node145</title>
+<title>Node19&#45;&gt;Node146</title>
 <path fill="none" stroke="#191970" d="M2059.2922,-806.938C2229.6646,-802.6055 2670.0215,-788.6108 2813,-757 2904.7895,-736.7065 3010,-774.006 3010,-680 3010,-680 3010,-680 3010,-417.5 3010,-372.9183 2989.8525,-323.1982 2978.5196,-298.7571"/>
 <polygon fill="#191970" stroke="#191970" points="2059.1027,-803.4416 2049.1937,-807.1915 2059.2784,-810.4394 2059.1027,-803.4416"/>
 </g>
-<!-- Node146 -->
+<!-- Node147 -->
 <g id="node35" class="node">
-<title>Node146</title>
+<title>Node147</title>
 <g id="a_node35"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
 <polygon fill="#ffffff" stroke="#000000" points="2792,-335.5 2792,-365.5 2944,-365.5 2944,-335.5 2792,-335.5"/>
 <text text-anchor="start" x="2800" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -247,15 +247,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node146 -->
+<!-- Node19&#45;&gt;Node147 -->
 <g id="edge112" class="edge">
-<title>Node19&#45;&gt;Node146</title>
+<title>Node19&#45;&gt;Node147</title>
 <path fill="none" stroke="#191970" d="M2059.3517,-807.2975C2239.1476,-803.6763 2715.766,-790.9543 2778,-757 2815.3774,-736.6072 2835,-722.5786 2835,-680 2835,-680 2835,-680 2835,-484.5 2835,-440.4155 2852.0479,-390.4779 2861.6373,-365.8751"/>
 <polygon fill="#191970" stroke="#191970" points="2059.1502,-803.8007 2049.2216,-807.4981 2059.2889,-810.7993 2059.1502,-803.8007"/>
 </g>
-<!-- Node147 -->
+<!-- Node148 -->
 <g id="node36" class="node">
-<title>Node147</title>
+<title>Node148</title>
 <g id="a_node36"><a xlink:href="space__generator_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/space_generator.h">
 <polygon fill="#ffffff" stroke="#000000" points="1750,-335.5 1750,-365.5 1902,-365.5 1902,-335.5 1750,-335.5"/>
 <text text-anchor="start" x="1758" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -263,45 +263,45 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node147 -->
+<!-- Node19&#45;&gt;Node148 -->
 <g id="edge116" class="edge">
-<title>Node19&#45;&gt;Node147</title>
+<title>Node19&#45;&gt;Node148</title>
 <path fill="none" stroke="#191970" d="M1954.7584,-787.8593C1921.3792,-765.8519 1877,-727.635 1877,-680 1877,-680 1877,-680 1877,-484.5 1877,-438.9479 1850.9389,-390.0469 1836.076,-365.862"/>
 <polygon fill="#191970" stroke="#191970" points="1953.0891,-790.9458 1963.4038,-793.366 1956.8497,-785.0417 1953.0891,-790.9458"/>
 </g>
-<!-- Node153 -->
+<!-- Node154 -->
 <g id="node38" class="node">
-<title>Node153</title>
+<title>Node154</title>
 <g id="a_node38"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
 <polygon fill="#ffffff" stroke="#000000" points="2019,-670.5 2019,-689.5 2155,-689.5 2155,-670.5 2019,-670.5"/>
 <text text-anchor="middle" x="2087" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node153 -->
+<!-- Node19&#45;&gt;Node154 -->
 <g id="edge85" class="edge">
-<title>Node19&#45;&gt;Node153</title>
+<title>Node19&#45;&gt;Node154</title>
 <path fill="none" stroke="#191970" d="M2008.5165,-785.0535C2029.788,-756.5807 2064.6846,-709.8701 2079.7266,-689.7358"/>
 <polygon fill="#191970" stroke="#191970" points="2005.5706,-783.1488 2002.3894,-793.2548 2011.1784,-787.3383 2005.5706,-783.1488"/>
 </g>
-<!-- Node159 -->
+<!-- Node161 -->
 <g id="node43" class="node">
-<title>Node159</title>
+<title>Node161</title>
 <g id="a_node43"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="355,-732 355,-751 473,-751 473,-732 355,-732"/>
 <text text-anchor="middle" x="414" y="-739" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type.h</text>
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node159 -->
+<!-- Node19&#45;&gt;Node161 -->
 <g id="edge100" class="edge">
-<title>Node19&#45;&gt;Node159</title>
+<title>Node19&#45;&gt;Node161</title>
 <path fill="none" stroke="#191970" d="M1922.404,-807.6427C1667.3447,-804.2221 773.8667,-790.0987 491,-757 479.2169,-755.6212 466.5496,-753.3895 454.96,-751.0437"/>
 <polygon fill="#191970" stroke="#191970" points="1922.5934,-811.1455 1932.6391,-807.7788 1922.6865,-804.1461 1922.5934,-811.1455"/>
 </g>
-<!-- Node152 -->
+<!-- Node153 -->
 <g id="node44" class="node">
-<title>Node152</title>
+<title>Node153</title>
 <g id="a_node44"><a xlink:href="schedule__rule_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/schedule_rule.h">
 <polygon fill="#ffffff" stroke="#000000" points="1958,-335.5 1958,-365.5 2110,-365.5 2110,-335.5 1958,-335.5"/>
 <text text-anchor="start" x="1966" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -309,15 +309,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node152 -->
+<!-- Node19&#45;&gt;Node153 -->
 <g id="edge113" class="edge">
-<title>Node19&#45;&gt;Node152</title>
+<title>Node19&#45;&gt;Node153</title>
 <path fill="none" stroke="#191970" d="M1991,-783.3849C1991,-757.4823 1991,-715.9175 1991,-680 1991,-680 1991,-680 1991,-484.5 1991,-439.5445 2013.2139,-389.9879 2025.7092,-365.6684"/>
 <polygon fill="#191970" stroke="#191970" points="1987.5001,-783.4649 1991,-793.4649 1994.5001,-783.465 1987.5001,-783.4649"/>
 </g>
-<!-- Node198 -->
+<!-- Node200 -->
 <g id="node45" class="node">
-<title>Node198</title>
+<title>Node200</title>
 <g id="a_node45"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="3155.5,-726.5 3155.5,-756.5 3306.5,-756.5 3306.5,-726.5 3155.5,-726.5"/>
 <text text-anchor="start" x="3163.5" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/structural</text>
@@ -325,15 +325,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node198 -->
+<!-- Node19&#45;&gt;Node200 -->
 <g id="edge119" class="edge">
-<title>Node19&#45;&gt;Node198</title>
+<title>Node19&#45;&gt;Node200</title>
 <path fill="none" stroke="#191970" d="M2059.4914,-806.5657C2273.7238,-800.3368 2929.3774,-779.8698 3141,-757 3145.729,-756.4889 3150.5856,-755.8848 3155.4749,-755.2164"/>
 <polygon fill="#191970" stroke="#191970" points="2059.2193,-803.0721 2049.3249,-806.8603 2059.4221,-810.0691 2059.2193,-803.0721"/>
 </g>
-<!-- Node210 -->
+<!-- Node212 -->
 <g id="node46" class="node">
-<title>Node210</title>
+<title>Node212</title>
 <g id="a_node46"><a xlink:href="papi_8h.html" target="_top" xlink:title="include/tvm/runtime\l/contrib/papi.h">
 <polygon fill="#ffffff" stroke="#000000" points="3325,-726.5 3325,-756.5 3441,-756.5 3441,-726.5 3325,-726.5"/>
 <text text-anchor="start" x="3333" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -341,15 +341,15 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node210 -->
+<!-- Node19&#45;&gt;Node212 -->
 <g id="edge120" class="edge">
-<title>Node19&#45;&gt;Node210</title>
+<title>Node19&#45;&gt;Node212</title>
 <path fill="none" stroke="#191970" d="M2059.3601,-807.5454C2294.5482,-803.9949 3069.3896,-790.0181 3316,-757 3318.8401,-756.6198 3321.7365,-756.1746 3324.6537,-755.68"/>
 <polygon fill="#191970" stroke="#191970" points="2059.1789,-804.0476 2049.2324,-807.6969 2059.2836,-811.0469 2059.1789,-804.0476"/>
 </g>
-<!-- Node211 -->
+<!-- Node213 -->
 <g id="node47" class="node">
-<title>Node211</title>
+<title>Node213</title>
 <g id="a_node47"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="2180,-402.5 2180,-432.5 2296,-432.5 2296,-402.5 2180,-402.5"/>
 <text text-anchor="start" x="2188" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -357,45 +357,45 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node211 -->
+<!-- Node19&#45;&gt;Node213 -->
 <g id="edge121" class="edge">
-<title>Node19&#45;&gt;Node211</title>
+<title>Node19&#45;&gt;Node213</title>
 <path fill="none" stroke="#191970" d="M2059.1271,-801.5331C2131.01,-789.9744 2234,-759.5356 2234,-680 2234,-680 2234,-680 2234,-551.5 2234,-508.4426 2236.0889,-457.6375 2237.2476,-432.7685"/>
 <polygon fill="#191970" stroke="#191970" points="2058.5647,-798.0783 2049.2046,-803.0419 2059.6171,-804.9988 2058.5647,-798.0783"/>
 </g>
-<!-- Node188 -->
+<!-- Node190 -->
 <g id="node48" class="node">
-<title>Node188</title>
+<title>Node190</title>
 <g id="a_node48"><a xlink:href="buffer_8h.html" target="_top" xlink:title="Symbolic n&#45;dimensional array, to represent a memory buffer. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1493,-609 1493,-628 1621,-628 1621,-609 1493,-609"/>
 <text text-anchor="middle" x="1557" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/buffer.h</text>
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node188 -->
+<!-- Node19&#45;&gt;Node190 -->
 <g id="edge137" class="edge">
-<title>Node19&#45;&gt;Node188</title>
+<title>Node19&#45;&gt;Node190</title>
 <path fill="none" stroke="#191970" d="M1922.7702,-805.7139C1832.7867,-800.8569 1680.51,-788.1872 1635,-757 1587.8787,-724.7086 1565.9631,-654.0839 1559.2999,-628.2309"/>
 <polygon fill="#191970" stroke="#191970" points="1922.7181,-809.2158 1932.8868,-806.2412 1923.0826,-802.2253 1922.7181,-809.2158"/>
 </g>
-<!-- Node189 -->
+<!-- Node191 -->
 <g id="node49" class="node">
-<title>Node189</title>
+<title>Node191</title>
 <g id="a_node49"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1404.5,-542 1404.5,-561 1525.5,-561 1525.5,-542 1404.5,-542"/>
 <text text-anchor="middle" x="1465" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node189 -->
+<!-- Node19&#45;&gt;Node191 -->
 <g id="edge143" class="edge">
-<title>Node19&#45;&gt;Node189</title>
+<title>Node19&#45;&gt;Node191</title>
 <path fill="none" stroke="#191970" d="M1922.4931,-804.9924C1825.7704,-799.1139 1654.8435,-785.0534 1600,-757 1533.1014,-722.7802 1519.4461,-700.257 1484,-634 1471.1042,-609.8947 1466.9176,-577.2662 1465.5923,-561.2249"/>
 <polygon fill="#191970" stroke="#191970" points="1922.5712,-808.5031 1932.7612,-805.6027 1922.9865,-801.5155 1922.5712,-808.5031"/>
 </g>
-<!-- Node194 -->
+<!-- Node196 -->
 <g id="node50" class="node">
-<title>Node194</title>
+<title>Node196</title>
 <g id="a_node50"><a xlink:href="index__map_8h.html" target="_top" xlink:title="Defines a remapping of buffer indices. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="3459,-726.5 3459,-756.5 3577,-756.5 3577,-726.5 3459,-726.5"/>
 <text text-anchor="start" x="3467" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/index</text>
@@ -403,9 +403,9 @@
 </a>
 </g>
 </g>
-<!-- Node19&#45;&gt;Node194 -->
+<!-- Node19&#45;&gt;Node196 -->
 <g id="edge144" class="edge">
-<title>Node19&#45;&gt;Node194</title>
+<title>Node19&#45;&gt;Node196</title>
 <path fill="none" stroke="#191970" d="M2059.4385,-807.7985C2309.857,-804.9647 3175.925,-792.7476 3450,-757 3452.8816,-756.6242 3455.8206,-756.1825 3458.7808,-755.6908"/>
 <polygon fill="#191970" stroke="#191970" points="2059.3433,-804.2992 2049.3831,-807.911 2059.4217,-811.2988 2059.3433,-804.2992"/>
 </g>
@@ -636,24 +636,24 @@
 <path fill="none" stroke="#191970" d="M976.8853,-614.5319C855.7583,-607.5829 631.8619,-591.5712 605,-567 586.3325,-549.9244 585.1317,-518.2514 586.3154,-499.5084"/>
 <polygon fill="#191970" stroke="#191970" points="977.0251,-618.0453 987.207,-615.1163 977.4209,-611.0565 977.0251,-618.0453"/>
 </g>
-<!-- Node140 -->
+<!-- Node141 -->
 <g id="node29" class="node">
-<title>Node140</title>
+<title>Node141</title>
 <g id="a_node29"><a xlink:href="error_8h.html" target="_top" xlink:title="Utilities for error tracking and reporting. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="849.5,-542 849.5,-561 968.5,-561 968.5,-542 849.5,-542"/>
 <text text-anchor="middle" x="909" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/error.h</text>
 </a>
 </g>
 </g>
-<!-- Node21&#45;&gt;Node140 -->
+<!-- Node21&#45;&gt;Node141 -->
 <g id="edge45" class="edge">
-<title>Node21&#45;&gt;Node140</title>
+<title>Node21&#45;&gt;Node141</title>
 <path fill="none" stroke="#191970" d="M1023.8577,-604.5722C995.6873,-591.5555 954.3763,-572.467 929.7364,-561.0817"/>
 <polygon fill="#191970" stroke="#191970" points="1022.6792,-607.8831 1033.225,-608.9005 1025.6154,-601.5287 1022.6792,-607.8831"/>
 </g>
-<!-- Node141 -->
+<!-- Node142 -->
 <g id="node30" class="node">
-<title>Node141</title>
+<title>Node142</title>
 <g id="a_node30"><a xlink:href="global__var__supply_8h.html" target="_top" xlink:title="GlobalVarSupply that can be used to generate unique. ">
 <polygon fill="#ffffff" stroke="#000000" points="965.5,-469.5 965.5,-499.5 1082.5,-499.5 1082.5,-469.5 965.5,-469.5"/>
 <text text-anchor="start" x="973.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/global</text>
@@ -661,15 +661,15 @@
 </a>
 </g>
 </g>
-<!-- Node21&#45;&gt;Node141 -->
+<!-- Node21&#45;&gt;Node142 -->
 <g id="edge47" class="edge">
-<title>Node21&#45;&gt;Node141</title>
+<title>Node21&#45;&gt;Node142</title>
 <path fill="none" stroke="#191970" d="M1054.9467,-598.3197C1055.2287,-581.4017 1054.4367,-556.7517 1049,-536 1045.6248,-523.1169 1038.9333,-509.6104 1033.3221,-499.663"/>
 <polygon fill="#191970" stroke="#191970" points="1051.4379,-598.5496 1054.6238,-608.6541 1058.4345,-598.7683 1051.4379,-598.5496"/>
 </g>
-<!-- Node142 -->
+<!-- Node143 -->
 <g id="node31" class="node">
-<title>Node142</title>
+<title>Node143</title>
 <g id="a_node31"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/arg_info.h">
 <polygon fill="#ffffff" stroke="#000000" points="2338,-402.5 2338,-432.5 2490,-432.5 2490,-402.5 2338,-402.5"/>
 <text text-anchor="start" x="2346" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -677,21 +677,21 @@
 </a>
 </g>
 </g>
-<!-- Node21&#45;&gt;Node142 -->
+<!-- Node21&#45;&gt;Node143 -->
 <g id="edge51" class="edge">
-<title>Node21&#45;&gt;Node142</title>
+<title>Node21&#45;&gt;Node143</title>
 <path fill="none" stroke="#191970" d="M1130.8889,-614.809C1284.4531,-607.0935 1624.2162,-588.2062 1739,-567 1888.6136,-539.359 1918.4832,-502.0302 2067,-469 2080.0977,-466.0871 2239.6385,-442.7741 2337.7688,-428.5317"/>
 <polygon fill="#191970" stroke="#191970" points="1130.4162,-611.3282 1120.6034,-615.323 1130.7657,-618.3194 1130.4162,-611.3282"/>
 </g>
-<!-- Node21&#45;&gt;Node147 -->
+<!-- Node21&#45;&gt;Node148 -->
 <g id="edge72" class="edge">
-<title>Node21&#45;&gt;Node147</title>
+<title>Node21&#45;&gt;Node148</title>
 <path fill="none" stroke="#191970" d="M1130.907,-615.1091C1291.0369,-607.6546 1649.6718,-588.761 1701,-567 1706.1822,-564.803 1773.9036,-504.7005 1777,-500 1805.8484,-456.2061 1818.9867,-394.0093 1823.7528,-365.648"/>
 <polygon fill="#191970" stroke="#191970" points="1130.4795,-611.6251 1120.652,-615.5835 1130.803,-618.6176 1130.4795,-611.6251"/>
 </g>
-<!-- Node148 -->
+<!-- Node149 -->
 <g id="node37" class="node">
-<title>Node148</title>
+<title>Node149</title>
 <g id="a_node37"><a xlink:href="state_8h.html" target="_top" xlink:title="This file defines ScheduleState, the core data structure of TensorIR scheduling. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1411,-402.5 1411,-432.5 1545,-432.5 1545,-402.5 1411,-402.5"/>
 <text text-anchor="start" x="1419" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/schedule</text>
@@ -699,9 +699,9 @@
 </a>
 </g>
 </g>
-<!-- Node21&#45;&gt;Node148 -->
+<!-- Node21&#45;&gt;Node149 -->
 <g id="edge83" class="edge">
-<title>Node21&#45;&gt;Node148</title>
+<title>Node21&#45;&gt;Node149</title>
 <path fill="none" stroke="#191970" d="M1130.9831,-607.1693C1172.5302,-599.2754 1223.9687,-586.5702 1267,-567 1346.8951,-530.6645 1427.817,-462.6382 1461.603,-432.5145"/>
 <polygon fill="#191970" stroke="#191970" points="1130.1178,-603.7698 1120.9181,-609.0249 1131.387,-610.6537 1130.1178,-603.7698"/>
 </g>
@@ -983,372 +983,372 @@
 <path fill="none" stroke="#191970" d="M249.3004,-131.8486C239.421,-119.1194 226.4626,-102.4229 218.4365,-92.0817"/>
 <polygon fill="#191970" stroke="#191970" points="246.6534,-134.1466 255.5497,-139.9005 252.1834,-129.8547 246.6534,-134.1466"/>
 </g>
-<!-- Node140&#45;&gt;Node25 -->
+<!-- Node141&#45;&gt;Node25 -->
 <g id="edge46" class="edge">
-<title>Node140&#45;&gt;Node25</title>
+<title>Node141&#45;&gt;Node25</title>
 <path fill="none" stroke="#191970" d="M891.2042,-534.94C877.393,-522.0879 858.7303,-504.7212 847.2967,-494.0817"/>
 <polygon fill="#191970" stroke="#191970" points="888.9791,-537.6504 898.6842,-541.9005 893.7478,-532.5259 888.9791,-537.6504"/>
 </g>
-<!-- Node141&#45;&gt;Node22 -->
+<!-- Node142&#45;&gt;Node22 -->
 <g id="edge48" class="edge">
-<title>Node141&#45;&gt;Node22</title>
+<title>Node142&#45;&gt;Node22</title>
 <path fill="none" stroke="#191970" d="M991.7159,-463.2983C981.7832,-455.0691 971.9167,-444.7725 966,-433 943.6544,-388.5386 947.4161,-327.2019 950.7669,-298.9207"/>
 <polygon fill="#191970" stroke="#191970" points="989.5784,-466.0698 999.6218,-469.4433 993.8743,-460.5429 989.5784,-466.0698"/>
 </g>
-<!-- Node142&#45;&gt;Node48 -->
+<!-- Node143&#45;&gt;Node48 -->
 <g id="edge55" class="edge">
-<title>Node142&#45;&gt;Node48</title>
+<title>Node143&#45;&gt;Node48</title>
 <path fill="none" stroke="#191970" d="M2381.0107,-397.3728C2348.3843,-378.382 2296.4512,-350.5681 2248,-335 2167.5978,-309.1656 2071.4079,-295.89 2006.028,-289.346"/>
 <polygon fill="#191970" stroke="#191970" points="2379.2888,-400.4206 2389.682,-402.4778 2382.8401,-394.3883 2379.2888,-400.4206"/>
 </g>
-<!-- Node142&#45;&gt;Node49 -->
+<!-- Node143&#45;&gt;Node49 -->
 <g id="edge67" class="edge">
-<title>Node142&#45;&gt;Node49</title>
+<title>Node143&#45;&gt;Node49</title>
 <path fill="none" stroke="#191970" d="M2420.8087,-392.6171C2432.3705,-350.364 2455.5,-265.8364 2464.8931,-231.5088"/>
 <polygon fill="#191970" stroke="#191970" points="2417.3974,-391.8231 2418.1339,-402.3923 2424.1492,-393.6707 2417.3974,-391.8231"/>
 </g>
-<!-- Node142&#45;&gt;Node143 -->
+<!-- Node143&#45;&gt;Node144 -->
 <g id="edge52" class="edge">
-<title>Node142&#45;&gt;Node143</title>
+<title>Node143&#45;&gt;Node144</title>
 <path fill="none" stroke="#191970" d="M2446.6561,-396.9168C2461.2367,-387.6141 2478.5735,-376.3975 2494,-366 2528.0067,-343.0794 2566.7808,-315.4327 2590.0498,-298.6738"/>
 <polygon fill="#191970" stroke="#191970" points="2444.5355,-394.1175 2437.9756,-402.4372 2448.292,-400.0242 2444.5355,-394.1175"/>
 </g>
-<!-- Node142&#45;&gt;Node144 -->
+<!-- Node143&#45;&gt;Node145 -->
 <g id="edge56" class="edge">
-<title>Node142&#45;&gt;Node144</title>
+<title>Node143&#45;&gt;Node145</title>
 <path fill="none" stroke="#191970" d="M2478.9155,-399.8198C2518.016,-389.1704 2567.2817,-375.7525 2604.552,-365.6017"/>
 <polygon fill="#191970" stroke="#191970" points="2477.936,-396.459 2469.2072,-402.4639 2479.7755,-403.213 2477.936,-396.459"/>
 </g>
-<!-- Node142&#45;&gt;Node146 -->
+<!-- Node143&#45;&gt;Node147 -->
 <g id="edge61" class="edge">
-<title>Node142&#45;&gt;Node146</title>
+<title>Node143&#45;&gt;Node147</title>
 <path fill="none" stroke="#191970" d="M2500.2951,-404.7648C2584.4856,-392.3402 2711.3131,-373.6234 2791.8527,-361.7376"/>
 <polygon fill="#191970" stroke="#191970" points="2499.6271,-401.3254 2490.2453,-406.2479 2500.6492,-408.2504 2499.6271,-401.3254"/>
 </g>
-<!-- Node143&#45;&gt;Node46 -->
+<!-- Node144&#45;&gt;Node46 -->
 <g id="edge54" class="edge">
-<title>Node143&#45;&gt;Node46</title>
+<title>Node144&#45;&gt;Node46</title>
 <path fill="none" stroke="#191970" d="M2626.3313,-259.7498C2661.2057,-205.7246 2745.6649,-74.8861 2774.3105,-30.5103"/>
 <polygon fill="#191970" stroke="#191970" points="2623.2798,-258.0235 2620.7969,-268.3233 2629.1609,-261.8199 2623.2798,-258.0235"/>
 </g>
-<!-- Node143&#45;&gt;Node49 -->
+<!-- Node144&#45;&gt;Node49 -->
 <g id="edge53" class="edge">
-<title>Node143&#45;&gt;Node49</title>
+<title>Node144&#45;&gt;Node49</title>
 <path fill="none" stroke="#191970" d="M2570.0747,-264.1902C2548.1989,-253.8685 2521.6504,-241.3421 2501.2525,-231.7177"/>
 <polygon fill="#191970" stroke="#191970" points="2568.595,-267.362 2579.1324,-268.4639 2571.5821,-261.0313 2568.595,-267.362"/>
 </g>
-<!-- Node144&#45;&gt;Node45 -->
+<!-- Node145&#45;&gt;Node45 -->
 <g id="edge59" class="edge">
-<title>Node144&#45;&gt;Node45</title>
+<title>Node145&#45;&gt;Node45</title>
 <path fill="none" stroke="#191970" d="M2680.8315,-327.126C2686.9469,-318.8268 2692.8389,-309.0395 2696,-299 2700.1379,-285.8583 2699.1438,-281.4143 2696,-268 2680.0872,-200.1004 2635.4585,-128.5823 2614.7748,-97.9233"/>
 <polygon fill="#191970" stroke="#191970" points="2677.9223,-325.163 2674.4829,-335.184 2683.4208,-329.4951 2677.9223,-325.163"/>
 </g>
-<!-- Node144&#45;&gt;Node49 -->
+<!-- Node145&#45;&gt;Node49 -->
 <g id="edge60" class="edge">
-<title>Node144&#45;&gt;Node49</title>
+<title>Node145&#45;&gt;Node49</title>
 <path fill="none" stroke="#191970" d="M2589.107,-332.1526C2567.4088,-324.294 2544.4282,-313.4969 2526,-299 2502.6027,-280.594 2485.0303,-249.9672 2475.9406,-231.6583"/>
 <polygon fill="#191970" stroke="#191970" points="2588.042,-335.4877 2598.6367,-335.4509 2590.3315,-328.8726 2588.042,-335.4877"/>
 </g>
-<!-- Node144&#45;&gt;Node143 -->
+<!-- Node145&#45;&gt;Node144 -->
 <g id="edge57" class="edge">
-<title>Node144&#45;&gt;Node143</title>
+<title>Node145&#45;&gt;Node144</title>
 <path fill="none" stroke="#191970" d="M2642.8592,-327.0626C2635.9619,-317.6315 2628.2068,-307.0276 2622.0749,-298.6432"/>
 <polygon fill="#191970" stroke="#191970" points="2640.1528,-329.2911 2648.8811,-335.2967 2645.803,-325.1588 2640.1528,-329.2911"/>
 </g>
-<!-- Node144&#45;&gt;Node145 -->
+<!-- Node145&#45;&gt;Node146 -->
 <g id="edge58" class="edge">
-<title>Node144&#45;&gt;Node145</title>
+<title>Node145&#45;&gt;Node146</title>
 <path fill="none" stroke="#191970" d="M2739.5832,-333.3551C2789.4924,-322.6029 2853.2281,-308.8721 2901.17,-298.5438"/>
 <polygon fill="#191970" stroke="#191970" points="2738.8331,-329.9363 2729.7945,-335.4639 2740.3073,-336.7793 2738.8331,-329.9363"/>
 </g>
-<!-- Node146&#45;&gt;Node45 -->
+<!-- Node147&#45;&gt;Node45 -->
 <g id="edge63" class="edge">
-<title>Node146&#45;&gt;Node45</title>
+<title>Node147&#45;&gt;Node45</title>
 <path fill="none" stroke="#191970" d="M2847.1736,-327.9185C2810.2132,-288.0497 2730.4069,-202.8874 2660,-134 2647.1601,-121.4372 2632.1579,-107.6877 2620.9146,-97.5579"/>
 <polygon fill="#191970" stroke="#191970" points="2844.8239,-330.5325 2854.1854,-335.4936 2849.9609,-325.7774 2844.8239,-330.5325"/>
 </g>
-<!-- Node146&#45;&gt;Node46 -->
+<!-- Node147&#45;&gt;Node46 -->
 <g id="edge65" class="edge">
-<title>Node146&#45;&gt;Node46</title>
+<title>Node147&#45;&gt;Node46</title>
 <path fill="none" stroke="#191970" d="M2867.6978,-325.1096C2867.4034,-298.1602 2867,-254.3276 2867,-216.5 2867,-216.5 2867,-216.5 2867,-149.5 2867,-99.9692 2825.5089,-53.4923 2801.1977,-30.5111"/>
 <polygon fill="#191970" stroke="#191970" points="2864.1992,-325.2675 2867.8119,-335.2273 2871.1988,-325.1885 2864.1992,-325.2675"/>
 </g>
-<!-- Node146&#45;&gt;Node47 -->
+<!-- Node147&#45;&gt;Node47 -->
 <g id="edge66" class="edge">
-<title>Node146&#45;&gt;Node47</title>
+<title>Node147&#45;&gt;Node47</title>
 <path fill="none" stroke="#191970" d="M2840.8722,-329.1349C2816.9468,-311.0478 2780.5993,-285.3656 2746,-268 2665.2236,-227.4579 2642.2556,-220.4793 2554,-201 2398.7507,-166.7343 2212.3685,-155.2484 2110.2343,-151.4117"/>
 <polygon fill="#191970" stroke="#191970" points="2839.0288,-332.1313 2849.0985,-335.4254 2843.2809,-326.5707 2839.0288,-332.1313"/>
 </g>
-<!-- Node146&#45;&gt;Node49 -->
+<!-- Node147&#45;&gt;Node49 -->
 <g id="edge64" class="edge">
-<title>Node146&#45;&gt;Node49</title>
+<title>Node147&#45;&gt;Node49</title>
 <path fill="none" stroke="#191970" d="M2832.3026,-330.4564C2798.1951,-312.017 2744.8852,-285.0271 2696,-268 2646.8657,-250.8862 2589.7366,-237.9601 2545.0823,-229.3383"/>
 <polygon fill="#191970" stroke="#191970" points="2830.9388,-333.6993 2841.3938,-335.4151 2834.2908,-327.554 2830.9388,-333.6993"/>
 </g>
-<!-- Node146&#45;&gt;Node143 -->
+<!-- Node147&#45;&gt;Node144 -->
 <g id="edge62" class="edge">
-<title>Node146&#45;&gt;Node143</title>
+<title>Node147&#45;&gt;Node144</title>
 <path fill="none" stroke="#191970" d="M2800.4412,-332.8874C2759.551,-322.2273 2707.9467,-308.774 2668.9274,-298.6017"/>
 <polygon fill="#191970" stroke="#191970" points="2799.7646,-336.3279 2810.3242,-335.4639 2801.5306,-329.5543 2799.7646,-336.3279"/>
 </g>
-<!-- Node147&#45;&gt;Node47 -->
+<!-- Node148&#45;&gt;Node47 -->
 <g id="edge73" class="edge">
-<title>Node147&#45;&gt;Node47</title>
+<title>Node148&#45;&gt;Node47</title>
 <path fill="none" stroke="#191970" d="M1826.82,-325.3425C1828.5074,-307.8507 1833.0721,-284.7587 1845,-268 1881.7178,-216.4114 1948.6278,-182.3828 1992.1869,-164.5272"/>
 <polygon fill="#191970" stroke="#191970" points="1823.3229,-325.1792 1826.1011,-335.4032 1830.3051,-325.6782 1823.3229,-325.1792"/>
 </g>
-<!-- Node153&#45;&gt;Node21 -->
+<!-- Node154&#45;&gt;Node21 -->
 <g id="edge86" class="edge">
-<title>Node153&#45;&gt;Node21</title>
+<title>Node154&#45;&gt;Node21</title>
 <path fill="none" stroke="#191970" d="M2008.601,-675.3325C1812.5104,-663.6582 1303.4505,-633.3511 1120.6119,-622.4658"/>
 <polygon fill="#191970" stroke="#191970" points="2008.6321,-678.8405 2018.8225,-675.941 2009.0482,-671.8528 2008.6321,-678.8405"/>
 </g>
-<!-- Node153&#45;&gt;Node90 -->
+<!-- Node154&#45;&gt;Node90 -->
 <g id="edge87" class="edge">
-<title>Node153&#45;&gt;Node90</title>
+<title>Node154&#45;&gt;Node90</title>
 <path fill="none" stroke="#191970" d="M2008.4524,-679.5509C1694.2667,-677.1076 538.6481,-661.7032 408,-567 265.734,-463.8752 261.9638,-211.1694 262.751,-159.0114"/>
 <polygon fill="#191970" stroke="#191970" points="2008.8034,-683.0535 2018.8296,-679.6292 2008.8563,-676.0537 2008.8034,-683.0535"/>
 </g>
-<!-- Node154 -->
+<!-- Node155 -->
 <g id="node39" class="node">
-<title>Node154</title>
+<title>Node155</title>
 <g id="a_node39"><a xlink:href="script_2ir__builder_2base_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/base.h">
-<polygon fill="#ffffff" stroke="#000000" points="2019,-603.5 2019,-633.5 2123,-633.5 2123,-603.5 2019,-603.5"/>
+<polygon fill="#ffffff" stroke="#ff0000" points="2019,-603.5 2019,-633.5 2123,-633.5 2123,-603.5 2019,-603.5"/>
 <text text-anchor="start" x="2027" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
 <text text-anchor="middle" x="2071" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/base.h</text>
 </a>
 </g>
 </g>
-<!-- Node153&#45;&gt;Node154 -->
+<!-- Node154&#45;&gt;Node155 -->
 <g id="edge88" class="edge">
-<title>Node153&#45;&gt;Node154</title>
+<title>Node154&#45;&gt;Node155</title>
 <path fill="none" stroke="#191970" d="M2081.968,-660.6584C2079.6907,-651.9047 2077.0394,-641.7139 2074.914,-633.5446"/>
 <polygon fill="#191970" stroke="#191970" points="2078.5949,-661.594 2084.5,-670.3906 2085.3694,-659.8315 2078.5949,-661.594"/>
 </g>
-<!-- Node155 -->
+<!-- Node156 -->
 <g id="node40" class="node">
-<title>Node155</title>
+<title>Node156</title>
 <g id="a_node40"><a xlink:href="ir__builder_2ir_2frame_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/frame.h">
-<polygon fill="#ffffff" stroke="#000000" points="2045,-536.5 2045,-566.5 2161,-566.5 2161,-536.5 2045,-536.5"/>
+<polygon fill="#ffffff" stroke="#ff0000" points="2045,-536.5 2045,-566.5 2161,-566.5 2161,-536.5 2045,-536.5"/>
 <text text-anchor="start" x="2053" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
 <text text-anchor="middle" x="2103" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/frame.h</text>
 </a>
 </g>
 </g>
-<!-- Node153&#45;&gt;Node155 -->
+<!-- Node154&#45;&gt;Node156 -->
 <g id="edge91" class="edge">
-<title>Node153&#45;&gt;Node155</title>
+<title>Node154&#45;&gt;Node156</title>
 <path fill="none" stroke="#191970" d="M2109.2638,-663.6617C2118.1073,-655.7992 2127.2844,-645.5637 2132,-634 2141.4591,-610.8042 2126.6776,-583.237 2114.9737,-566.5695"/>
 <polygon fill="#191970" stroke="#191970" points="2106.8504,-661.1158 2101.3737,-670.1853 2111.3109,-666.5106 2106.8504,-661.1158"/>
 </g>
-<!-- Node156 -->
+<!-- Node157 -->
 <g id="node41" class="node">
-<title>Node156</title>
-<g id="a_node41"><a xlink:href="ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/ir.h">
+<title>Node157</title>
+<g id="a_node41"><a xlink:href="ir_2ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/ir.h">
 <polygon fill="#ffffff" stroke="#000000" points="2076,-469.5 2076,-499.5 2180,-499.5 2180,-469.5 2076,-469.5"/>
 <text text-anchor="start" x="2084" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
 <text text-anchor="middle" x="2128" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/ir.h</text>
 </a>
 </g>
 </g>
-<!-- Node153&#45;&gt;Node156 -->
+<!-- Node154&#45;&gt;Node157 -->
 <g id="edge92" class="edge">
-<title>Node153&#45;&gt;Node156</title>
+<title>Node154&#45;&gt;Node157</title>
 <path fill="none" stroke="#191970" d="M2113.1237,-664.9342C2124.8765,-657.0255 2138.0811,-646.3958 2147,-634 2165.3876,-608.4441 2164.8182,-598.0541 2170,-567 2172.2676,-553.4101 2174.6216,-548.9795 2170,-536 2165.0463,-522.0879 2154.5463,-509.2129 2145.2847,-499.8159"/>
 <polygon fill="#191970" stroke="#191970" points="2111.0561,-662.1017 2104.5142,-670.4357 2114.8254,-668.0003 2111.0561,-662.1017"/>
 </g>
-<!-- Node157 -->
+<!-- Node159 -->
 <g id="node42" class="node">
-<title>Node157</title>
+<title>Node159</title>
 <g id="a_node42"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1481,-475 1481,-494 1621,-494 1621,-475 1481,-475"/>
 <text text-anchor="middle" x="1551" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node153&#45;&gt;Node157 -->
+<!-- Node154&#45;&gt;Node159 -->
 <g id="edge93" class="edge">
-<title>Node153&#45;&gt;Node157</title>
+<title>Node154&#45;&gt;Node159</title>
 <path fill="none" stroke="#191970" d="M2051.3438,-666.9948C1950.5511,-630.2318 1665.1647,-526.1403 1577.1648,-494.0433"/>
 <polygon fill="#191970" stroke="#191970" points="2050.1908,-670.2998 2060.7848,-670.4383 2052.5895,-663.7235 2050.1908,-670.2998"/>
 </g>
-<!-- Node154&#45;&gt;Node155 -->
+<!-- Node155&#45;&gt;Node156 -->
 <g id="edge89" class="edge">
-<title>Node154&#45;&gt;Node155</title>
+<title>Node155&#45;&gt;Node156</title>
 <path fill="none" stroke="#191970" d="M2082.6092,-594.1932C2087.0075,-584.9844 2091.8855,-574.771 2095.7674,-566.6432"/>
 <polygon fill="#191970" stroke="#191970" points="2079.4129,-592.7646 2078.2613,-603.2967 2085.7294,-595.7815 2079.4129,-592.7646"/>
 </g>
-<!-- Node155&#45;&gt;Node156 -->
+<!-- Node156&#45;&gt;Node157 -->
 <g id="edge90" class="edge">
-<title>Node155&#45;&gt;Node156</title>
+<title>Node156&#45;&gt;Node157</title>
 <path fill="none" stroke="#191970" d="M2112.1783,-526.9021C2115.586,-517.7696 2119.3487,-507.6854 2122.3496,-499.6432"/>
 <polygon fill="#191970" stroke="#191970" points="2108.8897,-525.704 2108.6729,-536.2967 2115.448,-528.1512 2108.8897,-525.704"/>
 </g>
-<!-- Node157&#45;&gt;Node22 -->
+<!-- Node159&#45;&gt;Node22 -->
 <g id="edge94" class="edge">
-<title>Node157&#45;&gt;Node22</title>
+<title>Node159&#45;&gt;Node22</title>
 <path fill="none" stroke="#191970" d="M1558.6118,-465.3061C1564.6136,-446.6276 1569.6356,-418.6905 1554,-402 1470.1102,-312.4503 1107.8914,-429.6744 1003,-366 978.0891,-350.8778 963.9427,-318.1994 957.436,-298.8163"/>
 <polygon fill="#191970" stroke="#191970" points="1555.2743,-464.245 1555.2077,-474.8396 1561.8667,-466.5989 1555.2743,-464.245"/>
 </g>
-<!-- Node157&#45;&gt;Node94 -->
+<!-- Node159&#45;&gt;Node94 -->
 <g id="edge96" class="edge">
-<title>Node157&#45;&gt;Node94</title>
+<title>Node159&#45;&gt;Node94</title>
 <path fill="none" stroke="#191970" d="M1508.7279,-472.0779C1464.4547,-459.0678 1395.8731,-438.9145 1355.6062,-427.0817"/>
 <polygon fill="#191970" stroke="#191970" points="1507.752,-475.4391 1518.3332,-474.9005 1509.7256,-468.7231 1507.752,-475.4391"/>
 </g>
-<!-- Node157&#45;&gt;Node142 -->
+<!-- Node159&#45;&gt;Node143 -->
 <g id="edge95" class="edge">
-<title>Node157&#45;&gt;Node142</title>
+<title>Node159&#45;&gt;Node143</title>
 <path fill="none" stroke="#191970" d="M1631.1179,-480.9539C1769.0348,-474.4192 2060.1006,-458.7414 2305,-433 2315.6452,-431.8811 2326.8489,-430.51 2337.8575,-429.0506"/>
 <polygon fill="#191970" stroke="#191970" points="1630.879,-477.4612 1621.0546,-481.4273 1631.208,-484.4534 1630.879,-477.4612"/>
 </g>
-<!-- Node157&#45;&gt;Node148 -->
+<!-- Node159&#45;&gt;Node149 -->
 <g id="edge97" class="edge">
-<title>Node157&#45;&gt;Node148</title>
+<title>Node159&#45;&gt;Node149</title>
 <path fill="none" stroke="#191970" d="M1533.0106,-467.9892C1521.1894,-457.1395 1505.8453,-443.0566 1494.367,-432.5218"/>
 <polygon fill="#191970" stroke="#191970" points="1530.8069,-470.7173 1540.5409,-474.9005 1535.5401,-465.5601 1530.8069,-470.7173"/>
 </g>
-<!-- Node159&#45;&gt;Node20 -->
+<!-- Node161&#45;&gt;Node20 -->
 <g id="edge101" class="edge">
-<title>Node159&#45;&gt;Node20</title>
+<title>Node161&#45;&gt;Node20</title>
 <path fill="none" stroke="#191970" d="M391.4883,-726.286C374.4725,-714.7864 351.6391,-699.355 337.1189,-689.5419"/>
 <polygon fill="#191970" stroke="#191970" points="389.5361,-729.191 399.7813,-731.8906 393.4557,-723.3913 389.5361,-729.191"/>
 </g>
-<!-- Node159&#45;&gt;Node21 -->
+<!-- Node161&#45;&gt;Node21 -->
 <g id="edge102" class="edge">
-<title>Node159&#45;&gt;Node21</title>
+<title>Node161&#45;&gt;Node21</title>
 <path fill="none" stroke="#191970" d="M473.8825,-729.9913C599.448,-705.8592 888.2541,-650.3543 1004.3445,-628.0432"/>
 <polygon fill="#191970" stroke="#191970" points="472.8469,-726.6262 463.6872,-731.9507 474.1681,-733.5004 472.8469,-726.6262"/>
 </g>
-<!-- Node159&#45;&gt;Node110 -->
+<!-- Node161&#45;&gt;Node110 -->
 <g id="edge103" class="edge">
-<title>Node159&#45;&gt;Node110</title>
+<title>Node161&#45;&gt;Node110</title>
 <path fill="none" stroke="#191970" d="M416.6636,-721.6825C422.924,-682.0149 441.9298,-591.8794 491,-536 505.2165,-519.8108 525.7819,-507.8794 544.3214,-499.6012"/>
 <polygon fill="#191970" stroke="#191970" points="413.1875,-721.2648 415.1934,-731.668 420.1128,-722.2845 413.1875,-721.2648"/>
 </g>
-<!-- Node152&#45;&gt;Node47 -->
+<!-- Node153&#45;&gt;Node47 -->
 <g id="edge114" class="edge">
-<title>Node152&#45;&gt;Node47</title>
+<title>Node153&#45;&gt;Node47</title>
 <path fill="none" stroke="#191970" d="M2034,-325.348C2034,-283.0061 2034,-198.7637 2034,-164.5088"/>
 <polygon fill="#191970" stroke="#191970" points="2030.5001,-325.3923 2034,-335.3923 2037.5001,-325.3924 2030.5001,-325.3923"/>
 </g>
-<!-- Node211&#45;&gt;Node22 -->
+<!-- Node213&#45;&gt;Node22 -->
 <g id="edge122" class="edge">
-<title>Node211&#45;&gt;Node22</title>
+<title>Node213&#45;&gt;Node22</title>
 <path fill="none" stroke="#191970" d="M2169.5498,-415.8208C1923.7259,-409.6247 1097.2521,-387.3001 1045,-366 1027.0209,-358.671 988.4752,-320.4645 967.433,-298.7069"/>
 <polygon fill="#191970" stroke="#191970" points="2169.6944,-419.3254 2179.7791,-416.0777 2169.8702,-412.3276 2169.6944,-419.3254"/>
 </g>
-<!-- Node211&#45;&gt;Node26 -->
+<!-- Node213&#45;&gt;Node26 -->
 <g id="edge135" class="edge">
-<title>Node211&#45;&gt;Node26</title>
+<title>Node213&#45;&gt;Node26</title>
 <path fill="none" stroke="#191970" d="M2169.7428,-415.0482C1983.8761,-408.2821 1453.1196,-388.3616 1012,-366 940.4854,-362.3747 858.0011,-357.3317 805.2216,-353.9948"/>
 <polygon fill="#191970" stroke="#191970" points="2169.7912,-418.5522 2179.9117,-415.4178 2170.0455,-411.5568 2169.7912,-418.5522"/>
 </g>
-<!-- Node211&#45;&gt;Node44 -->
+<!-- Node213&#45;&gt;Node44 -->
 <g id="edge123" class="edge">
-<title>Node211&#45;&gt;Node44</title>
+<title>Node213&#45;&gt;Node44</title>
 <path fill="none" stroke="#191970" d="M2230.502,-392.7514C2223.3323,-373.6336 2210.5248,-348.2693 2190,-335 2125.267,-293.15 1921.6171,-307.4616 1845,-299 1826.6307,-296.9713 1806.844,-294.7078 1788.2918,-292.551"/>
 <polygon fill="#191970" stroke="#191970" points="2227.2473,-394.0484 2233.8335,-402.3474 2233.8601,-391.7526 2227.2473,-394.0484"/>
 </g>
-<!-- Node211&#45;&gt;Node45 -->
+<!-- Node213&#45;&gt;Node45 -->
 <g id="edge127" class="edge">
-<title>Node211&#45;&gt;Node45</title>
+<title>Node213&#45;&gt;Node45</title>
 <path fill="none" stroke="#191970" d="M2264.5997,-395.7509C2291.1413,-371.4389 2328,-329.4113 2328,-283.5 2328,-283.5 2328,-283.5 2328,-216.5 2328,-168.095 2467.1163,-120.8793 2547.8146,-97.5625"/>
 <polygon fill="#191970" stroke="#191970" points="2262.2422,-393.1636 2257.0867,-402.4195 2266.889,-398.3988 2262.2422,-393.1636"/>
 </g>
-<!-- Node211&#45;&gt;Node46 -->
+<!-- Node213&#45;&gt;Node46 -->
 <g id="edge133" class="edge">
-<title>Node211&#45;&gt;Node46</title>
+<title>Node213&#45;&gt;Node46</title>
 <path fill="none" stroke="#191970" d="M2306.1627,-405.031C2313.8487,-403.8808 2321.581,-402.8358 2329,-402 2471.8744,-385.9033 2841.1279,-418.4448 2975,-366 3059.1946,-333.0165 3113,-306.9248 3113,-216.5 3113,-216.5 3113,-216.5 3113,-149.5 3113,-95.1498 2957.054,-52.1213 2859.8923,-30.587"/>
 <polygon fill="#191970" stroke="#191970" points="2305.4583,-401.5984 2296.1157,-406.595 2306.5351,-408.5151 2305.4583,-401.5984"/>
 </g>
-<!-- Node211&#45;&gt;Node47 -->
+<!-- Node213&#45;&gt;Node47 -->
 <g id="edge134" class="edge">
-<title>Node211&#45;&gt;Node47</title>
+<title>Node213&#45;&gt;Node47</title>
 <path fill="none" stroke="#191970" d="M2247.6536,-392.7277C2252.7021,-375.7674 2256.2872,-353.1508 2248,-335 2209.9148,-251.585 2113.7436,-191.2569 2064.3327,-164.6487"/>
 <polygon fill="#191970" stroke="#191970" points="2244.2494,-391.8729 2244.4141,-402.4665 2250.8916,-394.0824 2244.2494,-391.8729"/>
 </g>
-<!-- Node211&#45;&gt;Node48 -->
+<!-- Node213&#45;&gt;Node48 -->
 <g id="edge125" class="edge">
-<title>Node211&#45;&gt;Node48</title>
+<title>Node213&#45;&gt;Node48</title>
 <path fill="none" stroke="#191970" d="M2242.1597,-392.5067C2243.6857,-374.2049 2242.2935,-350.0342 2228,-335 2198.3357,-303.7983 2084.3795,-291.4382 2006.3046,-286.584"/>
 <polygon fill="#191970" stroke="#191970" points="2238.6772,-392.1536 2240.9994,-402.4908 2245.6304,-392.9617 2238.6772,-392.1536"/>
 </g>
-<!-- Node211&#45;&gt;Node49 -->
+<!-- Node213&#45;&gt;Node49 -->
 <g id="edge131" class="edge">
-<title>Node211&#45;&gt;Node49</title>
+<title>Node213&#45;&gt;Node49</title>
 <path fill="none" stroke="#191970" d="M2286.8235,-398.3127C2305.1042,-389.8847 2325.4374,-378.9393 2342,-366 2393.7954,-325.5356 2439.6766,-261.1119 2459.1219,-231.854"/>
 <polygon fill="#191970" stroke="#191970" points="2285.1833,-395.2121 2277.4817,-402.4878 2288.0396,-401.6028 2285.1833,-395.2121"/>
 </g>
-<!-- Node211&#45;&gt;Node51 -->
+<!-- Node213&#45;&gt;Node51 -->
 <g id="edge128" class="edge">
-<title>Node211&#45;&gt;Node51</title>
+<title>Node213&#45;&gt;Node51</title>
 <path fill="none" stroke="#191970" d="M2215.8404,-394.6988C2193.0375,-371.4912 2159.777,-338.4409 2152,-335 2045.0588,-287.6852 1743.3919,-310.315 1627,-299 1611.7903,-297.5214 1595.5562,-295.6707 1580.0357,-293.7681"/>
 <polygon fill="#191970" stroke="#191970" points="2213.624,-397.4379 2223.1208,-402.1349 2218.6258,-392.5408 2213.624,-397.4379"/>
 </g>
-<!-- Node211&#45;&gt;Node55 -->
+<!-- Node213&#45;&gt;Node55 -->
 <g id="edge136" class="edge">
-<title>Node211&#45;&gt;Node55</title>
+<title>Node213&#45;&gt;Node55</title>
 <path fill="none" stroke="#191970" d="M2200.6858,-397.4918C2185.1411,-388.5687 2167.2076,-377.4857 2152,-366 2135.9422,-353.8721 2137.5128,-342.8876 2119,-335 1975.7031,-273.9465 1573.0969,-321.7085 1419,-299 1415.9913,-298.5566 1412.9165,-298.0344 1409.8229,-297.4548"/>
 <polygon fill="#191970" stroke="#191970" points="2199.095,-400.6129 2209.5235,-402.4827 2202.5371,-394.5177 2199.095,-400.6129"/>
 </g>
-<!-- Node211&#45;&gt;Node143 -->
+<!-- Node213&#45;&gt;Node144 -->
 <g id="edge124" class="edge">
-<title>Node211&#45;&gt;Node143</title>
+<title>Node213&#45;&gt;Node144</title>
 <path fill="none" stroke="#191970" d="M2289.7181,-398.9203C2364.2566,-372.1424 2500.4499,-323.215 2569.0909,-298.5558"/>
 <polygon fill="#191970" stroke="#191970" points="2288.2906,-395.7141 2280.0628,-402.389 2290.6573,-402.3019 2288.2906,-395.7141"/>
 </g>
-<!-- Node211&#45;&gt;Node145 -->
+<!-- Node213&#45;&gt;Node146 -->
 <g id="edge126" class="edge">
-<title>Node211&#45;&gt;Node145</title>
+<title>Node213&#45;&gt;Node146</title>
 <path fill="none" stroke="#191970" d="M2306.1644,-405.0456C2313.85,-403.8927 2321.5818,-402.8431 2329,-402 2398.0044,-394.1573 2900.1884,-411.1004 2953,-366 2972.286,-349.53 2973.726,-317.6441 2972.6325,-298.6994"/>
 <polygon fill="#191970" stroke="#191970" points="2305.459,-401.6132 2296.1177,-406.6121 2306.5375,-408.5296 2305.459,-401.6132"/>
 </g>
-<!-- Node211&#45;&gt;Node146 -->
+<!-- Node213&#45;&gt;Node147 -->
 <g id="edge129" class="edge">
-<title>Node211&#45;&gt;Node146</title>
+<title>Node213&#45;&gt;Node147</title>
 <path fill="none" stroke="#191970" d="M2306.1902,-405.2591C2313.8711,-404.0666 2321.5947,-402.9495 2329,-402 2513.0731,-378.3995 2560.4586,-385.605 2745,-366 2760.1959,-364.3856 2776.4239,-362.4745 2791.9426,-360.5554"/>
 <polygon fill="#191970" stroke="#191970" points="2305.4711,-401.8295 2296.1479,-406.8622 2306.5746,-408.7419 2305.4711,-401.8295"/>
 </g>
-<!-- Node211&#45;&gt;Node147 -->
+<!-- Node213&#45;&gt;Node148 -->
 <g id="edge132" class="edge">
-<title>Node211&#45;&gt;Node147</title>
+<title>Node213&#45;&gt;Node148</title>
 <path fill="none" stroke="#191970" d="M2169.7334,-406.3984C2095.971,-394.4031 1978.7282,-375.3369 1902.0623,-362.8694"/>
 <polygon fill="#191970" stroke="#191970" points="2169.4121,-409.892 2179.8443,-408.0426 2170.5358,-402.9828 2169.4121,-409.892"/>
 </g>
-<!-- Node211&#45;&gt;Node152 -->
+<!-- Node213&#45;&gt;Node153 -->
 <g id="edge130" class="edge">
-<title>Node211&#45;&gt;Node152</title>
+<title>Node213&#45;&gt;Node153</title>
 <path fill="none" stroke="#191970" d="M2182.5117,-399.2759C2150.3608,-388.7165 2110.3645,-375.5805 2079.9813,-365.6017"/>
 <polygon fill="#191970" stroke="#191970" points="2181.6256,-402.6687 2192.2184,-402.4639 2183.8098,-396.0182 2181.6256,-402.6687"/>
 </g>
-<!-- Node188&#45;&gt;Node157 -->
+<!-- Node190&#45;&gt;Node159 -->
 <g id="edge142" class="edge">
-<title>Node188&#45;&gt;Node157</title>
+<title>Node190&#45;&gt;Node159</title>
 <path fill="none" stroke="#191970" d="M1556.11,-598.624C1554.7996,-569.3572 1552.4011,-515.7914 1551.4276,-494.0496"/>
 <polygon fill="#191970" stroke="#191970" points="1552.6246,-599.0297 1556.5685,-608.8631 1559.6176,-598.7165 1552.6246,-599.0297"/>
 </g>
-<!-- Node188&#45;&gt;Node189 -->
+<!-- Node190&#45;&gt;Node191 -->
 <g id="edge138" class="edge">
-<title>Node188&#45;&gt;Node189</title>
+<title>Node190&#45;&gt;Node191</title>
 <path fill="none" stroke="#191970" d="M1535.7304,-603.0102C1517.9577,-590.067 1493.1761,-572.0195 1478.1569,-561.0817"/>
 <polygon fill="#191970" stroke="#191970" points="1533.6746,-605.8428 1543.8186,-608.9005 1537.7955,-600.1843 1533.6746,-605.8428"/>
 </g>
-<!-- Node189&#45;&gt;Node55 -->
+<!-- Node191&#45;&gt;Node55 -->
 <g id="edge139" class="edge">
-<title>Node189&#45;&gt;Node55</title>
+<title>Node191&#45;&gt;Node55</title>
 <path fill="none" stroke="#191970" d="M1461.3852,-531.8099C1459.2082,-513.7779 1458.981,-487.2025 1472,-469 1495.1547,-436.6263 1532.2357,-466.3244 1554,-433 1561.5339,-421.4645 1560.795,-413.9856 1554,-402 1523.0796,-347.4596 1455.9672,-315.3173 1408.4689,-298.5709"/>
 <polygon fill="#191970" stroke="#191970" points="1457.9371,-532.4172 1462.8916,-541.7822 1464.8586,-531.3715 1457.9371,-532.4172"/>
 </g>
-<!-- Node189&#45;&gt;Node94 -->
+<!-- Node191&#45;&gt;Node94 -->
 <g id="edge140" class="edge">
-<title>Node189&#45;&gt;Node94</title>
+<title>Node191&#45;&gt;Node94</title>
 <path fill="none" stroke="#191970" d="M1447.1314,-534.6381C1416.9843,-506.1894 1356.9751,-449.561 1333.1198,-427.0496"/>
 <polygon fill="#191970" stroke="#191970" points="1445.1126,-537.5453 1454.7877,-541.8631 1449.9169,-532.4542 1445.1126,-537.5453"/>
 </g>
-<!-- Node189&#45;&gt;Node157 -->
+<!-- Node191&#45;&gt;Node159 -->
 <g id="edge141" class="edge">
-<title>Node189&#45;&gt;Node157</title>
+<title>Node191&#45;&gt;Node159</title>
 <path fill="none" stroke="#191970" d="M1485.3372,-535.6559C1501.9165,-522.7395 1524.7891,-504.9201 1538.7012,-494.0817"/>
 <polygon fill="#191970" stroke="#191970" points="1483.0593,-532.9937 1477.3217,-541.9005 1487.3613,-538.5158 1483.0593,-532.9937"/>
 </g>
diff --git a/docs/reference/api/doxygen/attr__registry__map_8h.html b/docs/reference/api/doxygen/attr__registry__map_8h.html
index b62d053d9..9c33e55f9 100644
--- a/docs/reference/api/doxygen/attr__registry__map_8h.html
+++ b/docs/reference/api/doxygen/attr__registry__map_8h.html
@@ -84,7 +84,7 @@ Include dependency graph for attr_registry_map.h:</div>
 </div><div class="textblock"><div class="dynheader">
 This graph shows which files directly or indirectly include this file:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="attr__registry__map_8h__dep__incl.svg" width="3619" height="678"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="attr__registry__map_8h__dep__incl.svg" width="3804" height="588"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/attr__registry__map_8h__dep__incl.svg b/docs/reference/api/doxygen/attr__registry__map_8h__dep__incl.svg
index c0fd6c4d4..6ebd733f1 100644
--- a/docs/reference/api/doxygen/attr__registry__map_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/attr__registry__map_8h__dep__incl.svg
@@ -4,987 +4,981 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/node/attr_registry_map.h Pages: 1 -->
-<svg width="2714pt" height="508pt"
- viewBox="0.00 0.00 2713.50 508.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 504)">
+<svg width="2853pt" height="441pt"
+ viewBox="0.00 0.00 2853.00 441.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 437)">
 <title>include/tvm/node/attr_registry_map.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-504 2709.5,-504 2709.5,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-437 2849,-437 2849,4 -4,4"/>
 <!-- Node24 -->
 <g id="node1" class="node">
 <title>Node24</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="1410,-469.5 1410,-499.5 1532,-499.5 1532,-469.5 1410,-469.5"/>
-<text text-anchor="start" x="1418" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/attr</text>
-<text text-anchor="middle" x="1471" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_registry_map.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="1778.5,-402.5 1778.5,-432.5 1900.5,-432.5 1900.5,-402.5 1778.5,-402.5"/>
+<text text-anchor="start" x="1786.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/attr</text>
+<text text-anchor="middle" x="1839.5" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_registry_map.h</text>
 </g>
 <!-- Node25 -->
 <g id="node2" class="node">
 <title>Node25</title>
 <g id="a_node2"><a xlink:href="ir_2op_8h.html" target="_top" xlink:title="Primitive operators(builtin intrinsics) and registry for them. ">
-<polygon fill="#ffffff" stroke="#000000" points="801,-341 801,-360 909,-360 909,-341 801,-341"/>
-<text text-anchor="middle" x="855" y="-348" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/op.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1059.5,-274 1059.5,-293 1167.5,-293 1167.5,-274 1059.5,-274"/>
+<text text-anchor="middle" x="1113.5" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/op.h</text>
 </a>
 </g>
 </g>
 <!-- Node24&#45;&gt;Node25 -->
 <g id="edge1" class="edge">
 <title>Node24&#45;&gt;Node25</title>
-<path fill="none" stroke="#191970" d="M1399.8413,-469.0207C1271.383,-441.0768 1004.3307,-382.9843 899.0424,-360.0807"/>
-<polygon fill="#191970" stroke="#191970" points="1399.4514,-472.5176 1409.9669,-471.2233 1400.9394,-465.6776 1399.4514,-472.5176"/>
-</g>
-<!-- Node111 -->
-<g id="node33" class="node">
-<title>Node111</title>
-<g id="a_node33"><a xlink:href="executor_8h.html" target="_top" xlink:title="Object representation of Executor configuration and registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="1499,-408 1499,-427 1655,-427 1655,-408 1499,-408"/>
-<text text-anchor="middle" x="1577" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/executor.h</text>
-</a>
-</g>
-</g>
-<!-- Node24&#45;&gt;Node111 -->
-<g id="edge54" class="edge">
-<title>Node24&#45;&gt;Node111</title>
-<path fill="none" stroke="#191970" d="M1503.3625,-464.0445C1522.7974,-451.7601 1546.6199,-436.7025 1561.8057,-427.1039"/>
-<polygon fill="#191970" stroke="#191970" points="1501.3714,-461.1624 1494.7885,-469.4639 1505.1115,-467.0795 1501.3714,-461.1624"/>
+<path fill="none" stroke="#191970" d="M1768.0451,-404.3114C1622.155,-377.384 1293.0628,-316.6424 1165.4458,-293.0878"/>
+<polygon fill="#191970" stroke="#191970" points="1767.7581,-407.8174 1778.2273,-406.1907 1769.0287,-400.9337 1767.7581,-407.8174"/>
 </g>
 <!-- Node112 -->
-<g id="node34" class="node">
+<g id="node33" class="node">
 <title>Node112</title>
-<g id="a_node34"><a xlink:href="runtime_8h.html" target="_top" xlink:title="Object representation of Runtime configuration and registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="1673,-408 1673,-427 1825,-427 1825,-408 1673,-408"/>
-<text text-anchor="middle" x="1749" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/runtime.h</text>
+<g id="a_node33"><a xlink:href="executor_8h.html" target="_top" xlink:title="Object representation of Executor configuration and registry. ">
+<polygon fill="#ffffff" stroke="#000000" points="1867.5,-341 1867.5,-360 2023.5,-360 2023.5,-341 1867.5,-341"/>
+<text text-anchor="middle" x="1945.5" y="-348" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/executor.h</text>
 </a>
 </g>
 </g>
 <!-- Node24&#45;&gt;Node112 -->
-<g id="edge55" class="edge">
+<g id="edge53" class="edge">
 <title>Node24&#45;&gt;Node112</title>
-<path fill="none" stroke="#191970" d="M1541.733,-467.4528C1594.9792,-454.6201 1666.0049,-437.5024 1709.5682,-427.0033"/>
-<polygon fill="#191970" stroke="#191970" points="1540.9099,-464.0509 1532.0083,-469.7966 1542.55,-470.8561 1540.9099,-464.0509"/>
+<path fill="none" stroke="#191970" d="M1871.8625,-397.0445C1891.2974,-384.7601 1915.1199,-369.7025 1930.3057,-360.1039"/>
+<polygon fill="#191970" stroke="#191970" points="1869.8714,-394.1624 1863.2885,-402.4639 1873.6115,-400.0795 1869.8714,-394.1624"/>
 </g>
 <!-- Node113 -->
-<g id="node35" class="node">
+<g id="node34" class="node">
 <title>Node113</title>
-<g id="a_node35"><a xlink:href="tag_8h.html" target="_top" xlink:title="Target tag registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="2503.5,-268.5 2503.5,-298.5 2610.5,-298.5 2610.5,-268.5 2503.5,-268.5"/>
-<text text-anchor="start" x="2511.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="2557" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tag.h</text>
+<g id="a_node34"><a xlink:href="runtime_8h.html" target="_top" xlink:title="Object representation of Runtime configuration and registry. ">
+<polygon fill="#ffffff" stroke="#000000" points="2041.5,-341 2041.5,-360 2193.5,-360 2193.5,-341 2041.5,-341"/>
+<text text-anchor="middle" x="2117.5" y="-348" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/runtime.h</text>
 </a>
 </g>
 </g>
 <!-- Node24&#45;&gt;Node113 -->
-<g id="edge56" class="edge">
+<g id="edge54" class="edge">
 <title>Node24&#45;&gt;Node113</title>
-<path fill="none" stroke="#191970" d="M1469.6118,-459.2274C1470.0719,-440.5702 1474.0668,-416.0689 1490,-402 1658.1223,-253.5501 2273.0172,-337.3283 2494,-299 2497.1074,-298.461 2500.2888,-297.8545 2503.4907,-297.2017"/>
-<polygon fill="#191970" stroke="#191970" points="1466.1132,-459.4399 1469.6859,-469.4141 1473.113,-459.3889 1466.1132,-459.4399"/>
+<path fill="none" stroke="#191970" d="M1910.233,-400.4528C1963.4792,-387.6201 2034.5049,-370.5024 2078.0682,-360.0033"/>
+<polygon fill="#191970" stroke="#191970" points="1909.4099,-397.0509 1900.5083,-402.7966 1911.05,-403.8561 1909.4099,-397.0509"/>
 </g>
 <!-- Node114 -->
-<g id="node36" class="node">
+<g id="node35" class="node">
 <title>Node114</title>
-<g id="a_node36"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="1335.5,-402.5 1335.5,-432.5 1442.5,-432.5 1442.5,-402.5 1335.5,-402.5"/>
-<text text-anchor="start" x="1343.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1389" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target_kind.h</text>
+<g id="a_node35"><a xlink:href="tag_8h.html" target="_top" xlink:title="Target tag registry. ">
+<polygon fill="#ffffff" stroke="#000000" points="2738,-201.5 2738,-231.5 2845,-231.5 2845,-201.5 2738,-201.5"/>
+<text text-anchor="start" x="2746" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="2791.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tag.h</text>
 </a>
 </g>
 </g>
 <!-- Node24&#45;&gt;Node114 -->
-<g id="edge57" class="edge">
+<g id="edge55" class="edge">
 <title>Node24&#45;&gt;Node114</title>
-<path fill="none" stroke="#191970" d="M1444.4183,-462.7808C1432.3722,-452.9383 1418.4233,-441.541 1407.5334,-432.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1442.4346,-465.6797 1452.3929,-469.2967 1446.8637,-460.2591 1442.4346,-465.6797"/>
+<path fill="none" stroke="#191970" d="M1838.1745,-392.2978C1838.6678,-373.678 1842.677,-349.1928 1858.5,-335 2003.4249,-205.006 2536.9459,-266.7663 2728.5,-232 2731.6031,-231.4368 2734.7811,-230.8114 2737.9805,-230.1441"/>
+<polygon fill="#191970" stroke="#191970" points="1834.6753,-392.4793 1838.2275,-402.4609 1841.6752,-392.4427 1834.6753,-392.4793"/>
+</g>
+<!-- Node115 -->
+<g id="node36" class="node">
+<title>Node115</title>
+<g id="a_node36"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
+<polygon fill="#ffffff" stroke="#000000" points="1704,-335.5 1704,-365.5 1811,-365.5 1811,-335.5 1704,-335.5"/>
+<text text-anchor="start" x="1712" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="1757.5" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target_kind.h</text>
+</a>
+</g>
+</g>
+<!-- Node24&#45;&gt;Node115 -->
+<g id="edge56" class="edge">
+<title>Node24&#45;&gt;Node115</title>
+<path fill="none" stroke="#191970" d="M1812.9183,-395.7808C1800.8722,-385.9383 1786.9233,-374.541 1776.0334,-365.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1810.9346,-398.6797 1820.8929,-402.2967 1815.3637,-393.2591 1810.9346,-398.6797"/>
 </g>
 <!-- Node26 -->
 <g id="node3" class="node">
 <title>Node26</title>
 <g id="a_node3"><a xlink:href="relay_2expr_8h.html" target="_top" xlink:title="Relay expression language. ">
-<polygon fill="#ffffff" stroke="#000000" points="1640.5,-207 1640.5,-226 1775.5,-226 1775.5,-207 1640.5,-207"/>
-<text text-anchor="middle" x="1708" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1574,-140 1574,-159 1709,-159 1709,-140 1574,-140"/>
+<text text-anchor="middle" x="1641.5" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node25&#45;&gt;Node26 -->
 <g id="edge2" class="edge">
 <title>Node25&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M868.8977,-332.9628C885.7218,-313.201 916.0536,-282.012 950,-268 957.944,-264.721 1455.7551,-232.6215 1640.2161,-220.8224"/>
-<polygon fill="#191970" stroke="#191970" points="866.0178,-330.9535 862.3482,-340.8925 871.4149,-335.4113 866.0178,-330.9535"/>
+<path fill="none" stroke="#191970" d="M1168.6031,-271.7018C1209.7732,-262.3624 1267.2034,-248.2201 1316.5,-232 1351.1521,-220.5984 1357.6028,-211.6278 1392.5,-201 1455.2103,-181.9018 1528.953,-167.6312 1579.8274,-159.0138"/>
+<polygon fill="#191970" stroke="#191970" points="1167.7541,-268.3052 1158.7647,-273.9124 1169.2888,-275.1349 1167.7541,-268.3052"/>
 </g>
 <!-- Node43 -->
-<g id="node19" class="node">
+<g id="node18" class="node">
 <title>Node43</title>
-<g id="a_node19"><a xlink:href="relay_2op_8h.html" target="_top" xlink:title="Primitive operators(builtin intrinsics). ">
-<polygon fill="#ffffff" stroke="#000000" points="1611,-140 1611,-159 1737,-159 1737,-140 1611,-140"/>
-<text text-anchor="middle" x="1674" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/op.h</text>
+<g id="a_node18"><a xlink:href="relay_2op_8h.html" target="_top" xlink:title="Primitive operators(builtin intrinsics). ">
+<polygon fill="#ffffff" stroke="#000000" points="1050.5,-73 1050.5,-92 1176.5,-92 1176.5,-73 1050.5,-73"/>
+<text text-anchor="middle" x="1113.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/op.h</text>
 </a>
 </g>
 </g>
 <!-- Node25&#45;&gt;Node43 -->
-<g id="edge35" class="edge">
+<g id="edge33" class="edge">
 <title>Node25&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M860.4797,-330.9525C866.9214,-311.7655 879.5516,-283.2297 901,-268 1022.5705,-181.6772 1416.239,-184.9427 1564,-165 1579.1945,-162.9493 1595.5601,-160.6774 1610.9042,-158.5196"/>
-<polygon fill="#191970" stroke="#191970" points="857.06,-330.1646 857.4514,-340.7522 863.7479,-332.2313 857.06,-330.1646"/>
+<path fill="none" stroke="#191970" d="M1113.5,-263.6373C1113.5,-221.2215 1113.5,-123.1332 1113.5,-92.0974"/>
+<polygon fill="#191970" stroke="#191970" points="1110.0001,-263.8053 1113.5,-273.8054 1117.0001,-263.8054 1110.0001,-263.8053"/>
 </g>
 <!-- Node46 -->
-<g id="node22" class="node">
+<g id="node21" class="node">
 <title>Node46</title>
-<g id="a_node22"><a xlink:href="builtin_8h.html" target="_top" xlink:title="TIR builtin intrinsics. ">
-<polygon fill="#ffffff" stroke="#000000" points="406.5,-207 406.5,-226 537.5,-226 537.5,-207 406.5,-207"/>
-<text text-anchor="middle" x="472" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/builtin.h</text>
+<g id="a_node21"><a xlink:href="builtin_8h.html" target="_top" xlink:title="TIR builtin intrinsics. ">
+<polygon fill="#ffffff" stroke="#000000" points="62,-140 62,-159 193,-159 193,-140 62,-140"/>
+<text text-anchor="middle" x="127.5" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/builtin.h</text>
 </a>
 </g>
 </g>
 <!-- Node25&#45;&gt;Node46 -->
-<g id="edge36" class="edge">
+<g id="edge34" class="edge">
 <title>Node25&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M791.0169,-342.4528C741.5146,-334.8992 671.879,-321.3443 614,-299 563.4633,-279.4903 509.7224,-243.4958 485.3713,-226.2329"/>
-<polygon fill="#191970" stroke="#191970" points="790.545,-345.921 800.951,-343.9294 791.5743,-338.9971 790.545,-345.921"/>
+<path fill="none" stroke="#191970" d="M1048.7828,-282.2874C883.9612,-278.7278 451.5898,-266.3092 312.5,-232 246.6029,-215.7452 174.917,-177.1087 143.6249,-159.0767"/>
+<polygon fill="#191970" stroke="#191970" points="1049.1745,-285.7964 1059.2464,-282.5092 1049.3229,-278.798 1049.1745,-285.7964"/>
 </g>
 <!-- Node59 -->
-<g id="node26" class="node">
+<g id="node25" class="node">
 <title>Node59</title>
-<g id="a_node26"><a xlink:href="tir_2op_8h.html" target="_top" xlink:title="Common operators defined for Expr. ">
-<polygon fill="#ffffff" stroke="#000000" points="175,-274 175,-293 287,-293 287,-274 175,-274"/>
-<text text-anchor="middle" x="231" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/op.h</text>
+<g id="a_node25"><a xlink:href="tir_2op_8h.html" target="_top" xlink:title="Common operators defined for Expr. ">
+<polygon fill="#ffffff" stroke="#000000" points="615.5,-207 615.5,-226 727.5,-226 727.5,-207 615.5,-207"/>
+<text text-anchor="middle" x="671.5" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/op.h</text>
 </a>
 </g>
 </g>
 <!-- Node25&#45;&gt;Node59 -->
-<g id="edge40" class="edge">
+<g id="edge38" class="edge">
 <title>Node25&#45;&gt;Node59</title>
-<path fill="none" stroke="#191970" d="M791.0049,-343.6287C668.5431,-330.4798 402.9333,-301.9608 287.0097,-289.5139"/>
-<polygon fill="#191970" stroke="#191970" points="790.6706,-347.1129 800.9871,-344.7005 791.418,-340.1529 790.6706,-347.1129"/>
+<path fill="none" stroke="#191970" d="M1049.3332,-273.7734C962.709,-260.6425 809.7613,-237.4582 727.5356,-224.9941"/>
+<polygon fill="#191970" stroke="#191970" points="1048.8905,-277.2462 1059.3021,-275.2845 1049.9396,-270.3252 1048.8905,-277.2462"/>
 </g>
 <!-- Node27 -->
 <g id="node4" class="node">
 <title>Node27</title>
 <g id="a_node4"><a xlink:href="type__functor_8h.html" target="_top" xlink:title="A way to defined arbitrary function signature with dispatch on types. ">
-<polygon fill="#ffffff" stroke="#000000" points="2320.5,-67.5 2320.5,-97.5 2429.5,-97.5 2429.5,-67.5 2320.5,-67.5"/>
-<text text-anchor="start" x="2328.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type</text>
-<text text-anchor="middle" x="2375" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_functor.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1600,-.5 1600,-30.5 1709,-30.5 1709,-.5 1600,-.5"/>
+<text text-anchor="start" x="1608" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type</text>
+<text text-anchor="middle" x="1654.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_functor.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node27 -->
 <g id="edge3" class="edge">
 <title>Node26&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1785.5663,-206.9057C1803.7381,-204.8119 1823.0363,-202.7127 1841,-201 1948.5864,-190.7425 2234.1173,-221.9001 2326,-165 2350.6435,-149.739 2364.424,-117.1105 2370.7238,-97.7736"/>
-<polygon fill="#191970" stroke="#191970" points="1785.1304,-203.4327 1775.6023,-208.0658 1785.9401,-210.3857 1785.1304,-203.4327"/>
+<path fill="none" stroke="#191970" d="M1696.5887,-136.5833C1716.8069,-128.8886 1737.6333,-116.8054 1749.5,-98 1756.8526,-86.3481 1756.5484,-78.8384 1749.5,-67 1739.5186,-50.2353 1721.9684,-38.5925 1704.6463,-30.6767"/>
+<polygon fill="#191970" stroke="#191970" points="1695.3475,-133.3094 1687.0665,-139.9182 1697.6614,-139.916 1695.3475,-133.3094"/>
 </g>
 <!-- Node28 -->
 <g id="node5" class="node">
 <title>Node28</title>
 <g id="a_node5"><a xlink:href="relay_2adt_8h.html" target="_top" xlink:title="Algebraic data types for Relay. ">
-<polygon fill="#ffffff" stroke="#000000" points="2170,-140 2170,-159 2300,-159 2300,-140 2170,-140"/>
-<text text-anchor="middle" x="2235" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/adt.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1232.5,-73 1232.5,-92 1362.5,-92 1362.5,-73 1232.5,-73"/>
+<text text-anchor="middle" x="1297.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node28 -->
 <g id="edge4" class="edge">
 <title>Node26&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1785.6183,-207.4015C1803.7879,-205.2863 1823.072,-203.0531 1841,-201 1983.19,-184.7164 2019.6916,-187.6958 2161,-165 2171.3679,-163.3348 2182.4825,-161.1967 2192.8645,-159.0438"/>
-<polygon fill="#191970" stroke="#191970" points="1785.1821,-203.9285 1775.6545,-208.5626 1785.9924,-210.8815 1785.1821,-203.9285"/>
+<path fill="none" stroke="#191970" d="M1582.1347,-137.9946C1528.4241,-127.5775 1447.1546,-111.7968 1376.5,-98 1366.6233,-96.0714 1356.0532,-94.0015 1345.961,-92.0224"/>
+<polygon fill="#191970" stroke="#191970" points="1581.6824,-141.472 1592.1659,-139.9399 1583.0151,-134.6 1581.6824,-141.472"/>
 </g>
 <!-- Node29 -->
 <g id="node6" class="node">
 <title>Node29</title>
 <g id="a_node6"><a xlink:href="relay_2analysis_8h.html" target="_top" xlink:title="The set of Relay analysis passes written in C++. ">
-<polygon fill="#ffffff" stroke="#000000" points="2147.5,-73 2147.5,-92 2302.5,-92 2302.5,-73 2147.5,-73"/>
-<text text-anchor="middle" x="2225" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/analysis.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1330,-6 1330,-25 1485,-25 1485,-6 1330,-6"/>
+<text text-anchor="middle" x="1407.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/analysis.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node29 -->
 <g id="edge9" class="edge">
 <title>Node26&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M1785.5696,-206.9407C1803.7413,-204.8454 1823.0386,-202.7368 1841,-201 1892.9115,-195.9804 2273.5642,-203.2663 2309,-165 2334.4157,-137.5541 2280.5708,-107.1896 2247.7957,-92.1041"/>
-<polygon fill="#191970" stroke="#191970" points="1785.1338,-203.4677 1775.6057,-208.101 1785.9435,-210.4208 1785.1338,-203.4677"/>
+<path fill="none" stroke="#191970" d="M1570.7192,-138.278C1506.3655,-127.3637 1419.8863,-110.628 1409.5,-98 1392.2993,-77.0867 1399.4064,-42.0858 1404.3047,-25.1975"/>
+<polygon fill="#191970" stroke="#191970" points="1570.4009,-141.7735 1580.8425,-139.9782 1571.5604,-134.8702 1570.4009,-141.7735"/>
 </g>
 <!-- Node30 -->
 <g id="node7" class="node">
 <title>Node30</title>
 <g id="a_node7"><a xlink:href="relay_2expr__functor_8h.html" target="_top" xlink:title="A more powerful visitor which enables defining arbitrary function signatures with type based dispatch...">
-<polygon fill="#ffffff" stroke="#000000" points="2002.5,-67.5 2002.5,-97.5 2129.5,-97.5 2129.5,-67.5 2002.5,-67.5"/>
-<text text-anchor="start" x="2010.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr</text>
-<text text-anchor="middle" x="2066" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_functor.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1185,-.5 1185,-30.5 1312,-30.5 1312,-.5 1185,-.5"/>
+<text text-anchor="start" x="1193" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr</text>
+<text text-anchor="middle" x="1248.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_functor.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node30 -->
-<g id="edge18" class="edge">
+<g id="edge16" class="edge">
 <title>Node26&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M1785.006,-205.1935C1829.1199,-197.1549 1884.8992,-184.3017 1932,-165 1954.7513,-155.6766 2011.7561,-118.6775 2043.5976,-97.531"/>
-<polygon fill="#191970" stroke="#191970" points="1784.153,-201.7901 1774.9192,-206.9851 1785.3773,-208.6823 1784.153,-201.7901"/>
+<path fill="none" stroke="#191970" d="M1572.8876,-138.4844C1562.3847,-136.9084 1551.6606,-135.362 1541.5,-134 1471.0124,-124.5514 1270.8753,-151.0411 1223.5,-98 1206.5516,-79.0246 1223.1373,-48.8702 1236.1935,-30.7516"/>
+<polygon fill="#191970" stroke="#191970" points="1572.4518,-141.9582 1582.864,-139.9993 1573.5028,-135.0376 1572.4518,-141.9582"/>
 </g>
 <!-- Node31 -->
 <g id="node8" class="node">
 <title>Node31</title>
 <g id="a_node8"><a xlink:href="pattern__functor_8h.html" target="_top" xlink:title="A more powerful visitor on ADT patterns that enables defining arbitrary function signatures with type...">
-<polygon fill="#ffffff" stroke="#000000" points="1844,-67.5 1844,-97.5 1984,-97.5 1984,-67.5 1844,-67.5"/>
-<text text-anchor="start" x="1852" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/pattern</text>
-<text text-anchor="middle" x="1914" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_functor.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1026.5,-.5 1026.5,-30.5 1166.5,-30.5 1166.5,-.5 1026.5,-.5"/>
+<text text-anchor="start" x="1034.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/pattern</text>
+<text text-anchor="middle" x="1096.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_functor.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node31 -->
-<g id="edge33" class="edge">
+<g id="edge31" class="edge">
 <title>Node26&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1785.8337,-207.0368C1837.2594,-199.0006 1898.0217,-185.4647 1914,-165 1928.9846,-145.808 1924.0678,-115.7246 1919.0732,-97.6782"/>
-<polygon fill="#191970" stroke="#191970" points="1785.0532,-203.615 1775.6891,-208.5712 1786.1001,-210.5363 1785.0532,-203.615"/>
+<path fill="none" stroke="#191970" d="M1575.1165,-138.4313C1563.9165,-136.7877 1552.3927,-135.2342 1541.5,-134 1486.1548,-127.729 1079.4448,-138.7751 1041.5,-98 1021.7988,-76.8293 1051.6419,-47.9153 1074.4871,-30.5386"/>
+<polygon fill="#191970" stroke="#191970" points="1574.8747,-141.9342 1585.2839,-139.9597 1575.9152,-135.012 1574.8747,-141.9342"/>
 </g>
 <!-- Node32 -->
 <g id="node9" class="node">
 <title>Node32</title>
 <g id="a_node9"><a xlink:href="algorithm_8h.html" target="_top" xlink:title="include/tvm/relay/attrs\l/algorithm.h">
-<polygon fill="#ffffff" stroke="#000000" points="1281,-134.5 1281,-164.5 1409,-164.5 1409,-134.5 1281,-134.5"/>
-<text text-anchor="start" x="1289" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/attrs</text>
-<text text-anchor="middle" x="1345" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/algorithm.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1795.5,-67.5 1795.5,-97.5 1923.5,-97.5 1923.5,-67.5 1795.5,-67.5"/>
+<text text-anchor="start" x="1803.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/attrs</text>
+<text text-anchor="middle" x="1859.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/algorithm.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node32 -->
 <g id="edge10" class="edge">
 <title>Node26&#45;&gt;Node32</title>
-<path fill="none" stroke="#191970" d="M1641.3024,-205.3078C1582.4477,-195.2763 1494.3877,-179.888 1418,-165 1415.0751,-164.4299 1412.0866,-163.8372 1409.0694,-163.2304"/>
-<polygon fill="#191970" stroke="#191970" points="1640.7704,-208.7675 1651.2157,-206.9939 1641.9442,-201.8666 1640.7704,-208.7675"/>
+<path fill="none" stroke="#191970" d="M1682.5199,-136.893C1718.8877,-125.7157 1772.0608,-109.3735 1810.6232,-97.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1681.2646,-133.6171 1672.7341,-139.9005 1683.3211,-140.3082 1681.2646,-133.6171"/>
 </g>
 <!-- Node33 -->
 <g id="node10" class="node">
 <title>Node33</title>
 <g id="a_node10"><a xlink:href="relay_2attrs_2memory_8h.html" target="_top" xlink:title="Attributes for memory operators. ">
-<polygon fill="#ffffff" stroke="#000000" points="2394,-134.5 2394,-164.5 2522,-164.5 2522,-134.5 2394,-134.5"/>
-<text text-anchor="start" x="2402" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/attrs</text>
-<text text-anchor="middle" x="2458" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/memory.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2291.5,-67.5 2291.5,-97.5 2419.5,-97.5 2419.5,-67.5 2291.5,-67.5"/>
+<text text-anchor="start" x="2299.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/attrs</text>
+<text text-anchor="middle" x="2355.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/memory.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node33 -->
 <g id="edge11" class="edge">
 <title>Node26&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1785.5572,-206.8088C1803.7294,-204.7192 1823.0301,-202.6462 1841,-201 2080.0881,-179.0972 2141.9813,-196.4634 2380,-165 2384.5168,-164.4029 2389.1653,-163.7022 2393.8364,-162.9345"/>
-<polygon fill="#191970" stroke="#191970" points="1785.1214,-203.3358 1775.5933,-207.9688 1785.931,-210.2889 1785.1214,-203.3358"/>
+<path fill="none" stroke="#191970" d="M1719.4659,-146.4829C1840.4993,-141.0692 2080.686,-127.4073 2282.5,-98 2285.4488,-97.5703 2288.4569,-97.0927 2291.49,-96.5793"/>
+<polygon fill="#191970" stroke="#191970" points="1719.027,-142.9988 1709.1907,-146.9354 1719.335,-149.992 1719.027,-142.9988"/>
 </g>
 <!-- Node34 -->
 <g id="node11" class="node">
 <title>Node34</title>
 <g id="a_node11"><a xlink:href="relay_2attrs_2transform_8h.html" target="_top" xlink:title="Transform operators. ">
-<polygon fill="#ffffff" stroke="#000000" points="1427,-134.5 1427,-164.5 1555,-164.5 1555,-134.5 1427,-134.5"/>
-<text text-anchor="start" x="1435" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/attrs</text>
-<text text-anchor="middle" x="1491" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/transform.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2145.5,-67.5 2145.5,-97.5 2273.5,-97.5 2273.5,-67.5 2145.5,-67.5"/>
+<text text-anchor="start" x="2153.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/attrs</text>
+<text text-anchor="middle" x="2209.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node34 -->
 <g id="edge12" class="edge">
 <title>Node26&#45;&gt;Node34</title>
-<path fill="none" stroke="#191970" d="M1667.1682,-203.893C1630.9673,-192.7157 1578.0381,-176.3735 1539.6526,-164.5218"/>
-<polygon fill="#191970" stroke="#191970" points="1666.3217,-207.2946 1676.9092,-206.9005 1668.3868,-200.6061 1666.3217,-207.2946"/>
+<path fill="none" stroke="#191970" d="M1719.1216,-143.5481C1815.9738,-135.615 1986.5158,-119.9775 2131.5,-98 2136.0047,-97.3172 2140.6441,-96.5528 2145.3087,-95.7393"/>
+<polygon fill="#191970" stroke="#191970" points="1718.8037,-140.0623 1709.1203,-144.3613 1719.3711,-147.0393 1718.8037,-140.0623"/>
 </g>
 <!-- Node35 -->
 <g id="node12" class="node">
 <title>Node35</title>
 <g id="a_node12"><a xlink:href="relay_2transform_8h.html" target="_top" xlink:title="Relay specific transformation passes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1410,-73 1410,-92 1572,-92 1572,-73 1410,-73"/>
-<text text-anchor="middle" x="1491" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/transform.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2071.5,-6 2071.5,-25 2233.5,-25 2233.5,-6 2071.5,-6"/>
+<text text-anchor="middle" x="2152.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node35 -->
-<g id="edge34" class="edge">
+<g id="edge32" class="edge">
 <title>Node26&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1675.0796,-202.8819C1653.6017,-193.43 1625.328,-179.896 1602,-165 1583.6298,-153.2698 1581.36,-147.1792 1564,-134 1543.9874,-118.807 1520.0703,-102.2194 1505.2044,-92.0863"/>
-<polygon fill="#191970" stroke="#191970" points="1673.8599,-206.1677 1684.4274,-206.9276 1676.6403,-199.7435 1673.8599,-206.1677"/>
+<path fill="none" stroke="#191970" d="M1719.1454,-144.3544C1841.0972,-135.7368 2066.9612,-117.4115 2098.5,-98 2126.4904,-80.7724 2142.8702,-42.98 2149.3107,-25.1605"/>
+<polygon fill="#191970" stroke="#191970" points="1718.8439,-140.8669 1709.1133,-145.058 1719.3337,-147.8497 1718.8439,-140.8669"/>
 </g>
 <!-- Node37 -->
 <g id="node13" class="node">
 <title>Node37</title>
 <g id="a_node13"><a xlink:href="dataflow__pattern_8h.html" target="_top" xlink:title="A pattern language for matching dataflow properties. ">
-<polygon fill="#ffffff" stroke="#000000" points="1039.5,-134.5 1039.5,-164.5 1186.5,-164.5 1186.5,-134.5 1039.5,-134.5"/>
-<text text-anchor="start" x="1047.5" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/dataflow</text>
-<text text-anchor="middle" x="1113" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_pattern.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1942,-67.5 1942,-97.5 2089,-97.5 2089,-67.5 1942,-67.5"/>
+<text text-anchor="start" x="1950" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/dataflow</text>
+<text text-anchor="middle" x="2015.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_pattern.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node37 -->
 <g id="edge14" class="edge">
 <title>Node26&#45;&gt;Node37</title>
-<path fill="none" stroke="#191970" d="M1630.1502,-207.7337C1514.7044,-194.7339 1299.6259,-170.515 1186.6518,-157.7936"/>
-<polygon fill="#191970" stroke="#191970" points="1629.9964,-211.2384 1640.3252,-208.8795 1630.7797,-204.2824 1629.9964,-211.2384"/>
+<path fill="none" stroke="#191970" d="M1706.1566,-138.2379C1765.1478,-127.9196 1854.7205,-112.1448 1932.5,-98 1935.5287,-97.4492 1938.619,-96.8845 1941.7404,-96.3118"/>
+<polygon fill="#191970" stroke="#191970" points="1705.4816,-134.8027 1696.2337,-139.9725 1706.687,-141.6981 1705.4816,-134.8027"/>
 </g>
 <!-- Node40 -->
-<g id="node16" class="node">
+<g id="node15" class="node">
 <title>Node40</title>
-<g id="a_node16"><a xlink:href="relay_2feature_8h.html" target="_top" xlink:title="Detect features used in Expr/Module. ">
-<polygon fill="#ffffff" stroke="#000000" points="1755.5,-140 1755.5,-159 1904.5,-159 1904.5,-140 1755.5,-140"/>
-<text text-anchor="middle" x="1830" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/feature.h</text>
+<g id="a_node15"><a xlink:href="relay_2feature_8h.html" target="_top" xlink:title="Detect features used in Expr/Module. ">
+<polygon fill="#ffffff" stroke="#000000" points="1591,-73 1591,-92 1740,-92 1740,-73 1591,-73"/>
+<text text-anchor="middle" x="1665.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/feature.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node40 -->
-<g id="edge19" class="edge">
+<g id="edge17" class="edge">
 <title>Node26&#45;&gt;Node40</title>
-<path fill="none" stroke="#191970" d="M1734.2982,-202.0575C1757.9714,-189.0567 1792.0929,-170.3179 1812.5528,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1732.56,-199.019 1725.4796,-206.9005 1735.9296,-205.1546 1732.56,-199.019"/>
+<path fill="none" stroke="#191970" d="M1648.3551,-130.3628C1652.8393,-117.8444 1658.5059,-102.0252 1662.0678,-92.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1645.016,-129.306 1644.9386,-139.9005 1651.6059,-131.6666 1645.016,-129.306"/>
 </g>
 <!-- Node41 -->
-<g id="node17" class="node">
+<g id="node16" class="node">
 <title>Node41</title>
-<g id="a_node17"><a xlink:href="relay_2function_8h.html" target="_top" xlink:title="Relay Function. ">
-<polygon fill="#ffffff" stroke="#000000" points="1998,-140 1998,-159 2152,-159 2152,-140 1998,-140"/>
-<text text-anchor="middle" x="2075" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/function.h</text>
+<g id="a_node16"><a xlink:href="relay_2function_8h.html" target="_top" xlink:title="Relay Function. ">
+<polygon fill="#ffffff" stroke="#000000" points="1418.5,-73 1418.5,-92 1572.5,-92 1572.5,-73 1418.5,-73"/>
+<text text-anchor="middle" x="1495.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node41 -->
-<g id="edge20" class="edge">
+<g id="edge18" class="edge">
 <title>Node26&#45;&gt;Node41</title>
-<path fill="none" stroke="#191970" d="M1770.3416,-205.1188C1841.3577,-192.154 1956.1463,-171.1981 2022.6909,-159.0496"/>
-<polygon fill="#191970" stroke="#191970" points="1769.4012,-201.7326 1760.1924,-206.9717 1770.6584,-208.6188 1769.4012,-201.7326"/>
+<path fill="none" stroke="#191970" d="M1611.1498,-135.5722C1582.7852,-122.5555 1541.1893,-103.467 1516.3794,-92.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1610.0333,-138.9107 1620.5818,-139.9005 1612.9529,-132.5486 1610.0333,-138.9107"/>
 </g>
 <!-- Node42 -->
-<g id="node18" class="node">
+<g id="node17" class="node">
 <title>Node42</title>
-<g id="a_node18"><a xlink:href="interpreter_8h.html" target="_top" xlink:title="An interpreter for Relay. ">
-<polygon fill="#ffffff" stroke="#000000" points="2540.5,-140 2540.5,-159 2705.5,-159 2705.5,-140 2540.5,-140"/>
-<text text-anchor="middle" x="2623" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/interpreter.h</text>
+<g id="a_node17"><a xlink:href="interpreter_8h.html" target="_top" xlink:title="An interpreter for Relay. ">
+<polygon fill="#ffffff" stroke="#000000" points="2438,-73 2438,-92 2603,-92 2603,-73 2438,-73"/>
+<text text-anchor="middle" x="2520.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/interpreter.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node42 -->
-<g id="edge24" class="edge">
+<g id="edge22" class="edge">
 <title>Node26&#45;&gt;Node42</title>
-<path fill="none" stroke="#191970" d="M1785.8123,-211.5027C1833.4952,-208.4488 1895.7755,-204.4751 1951,-201 2208.764,-184.7796 2274.4158,-194.495 2531,-165 2544.3558,-163.4647 2558.7221,-161.2956 2572.0396,-159.0651"/>
-<polygon fill="#191970" stroke="#191970" points="1785.3161,-208.0272 1775.5604,-212.1595 1785.7637,-215.0129 1785.3161,-208.0272"/>
+<path fill="none" stroke="#191970" d="M1719.3959,-147.0257C1860.9711,-141.9458 2169.7709,-128.1815 2428.5,-98 2441.8532,-96.4423 2456.2186,-94.2654 2469.536,-92.0344"/>
+<polygon fill="#191970" stroke="#191970" points="1718.9545,-143.539 1709.0847,-147.3909 1719.2024,-150.5347 1718.9545,-143.539"/>
 </g>
 <!-- Node26&#45;&gt;Node43 -->
-<g id="edge25" class="edge">
+<g id="edge23" class="edge">
 <title>Node26&#45;&gt;Node43</title>
-<path fill="none" stroke="#191970" d="M1698.4787,-197.7374C1692.0968,-185.1614 1683.9588,-169.1246 1678.8623,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1695.4822,-199.5669 1703.1286,-206.9005 1701.7244,-196.3992 1695.4822,-199.5669"/>
+<path fill="none" stroke="#191970" d="M1573.3093,-138.4497C1562.6743,-136.8653 1551.7996,-135.3255 1541.5,-134 1385.9644,-113.9833 1345.5475,-121.5006 1190.5,-98 1179.5794,-96.3448 1167.8608,-94.1852 1156.9425,-92.0062"/>
+<polygon fill="#191970" stroke="#191970" points="1572.9947,-141.9418 1583.4057,-139.9769 1574.0417,-135.0205 1572.9947,-141.9418"/>
 </g>
 <!-- Node44 -->
-<g id="node20" class="node">
+<g id="node19" class="node">
 <title>Node44</title>
-<g id="a_node20"><a xlink:href="relay_2op__attr__types_8h.html" target="_top" xlink:title="The Expr and related elements in DataFlow construction. ">
-<polygon fill="#ffffff" stroke="#000000" points="861.5,-134.5 861.5,-164.5 978.5,-164.5 978.5,-134.5 861.5,-134.5"/>
-<text text-anchor="start" x="869.5" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/op</text>
-<text text-anchor="middle" x="920" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_attr_types.h</text>
+<g id="a_node19"><a xlink:href="relay_2op__attr__types_8h.html" target="_top" xlink:title="The Expr and related elements in DataFlow construction. ">
+<polygon fill="#ffffff" stroke="#000000" points="839,-67.5 839,-97.5 956,-97.5 956,-67.5 839,-67.5"/>
+<text text-anchor="start" x="847" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/op</text>
+<text text-anchor="middle" x="897.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_attr_types.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node44 -->
-<g id="edge29" class="edge">
+<g id="edge27" class="edge">
 <title>Node26&#45;&gt;Node44</title>
-<path fill="none" stroke="#191970" d="M1630.2379,-211.4451C1458.5628,-200.2032 1057.476,-173.4563 996,-165 990.397,-164.2293 984.5869,-163.2943 978.7883,-162.2696"/>
-<polygon fill="#191970" stroke="#191970" points="1630.2598,-214.954 1640.467,-212.1144 1630.7169,-207.9689 1630.2598,-214.954"/>
+<path fill="none" stroke="#191970" d="M1576.0926,-138.4895C1564.589,-136.8025 1552.7119,-135.2198 1541.5,-134 1288.71,-106.4974 1222.4305,-132.4992 970.5,-98 965.8789,-97.3672 961.1139,-96.6058 956.3368,-95.7646"/>
+<polygon fill="#191970" stroke="#191970" points="1575.6701,-141.9652 1586.0796,-139.9922 1576.7117,-135.0431 1575.6701,-141.9652"/>
 </g>
 <!-- Node45 -->
-<g id="node21" class="node">
+<g id="node20" class="node">
 <title>Node45</title>
-<g id="a_node21"><a xlink:href="op__strategy_8h.html" target="_top" xlink:title="The Relay operator Strategy and related data structure. ">
-<polygon fill="#ffffff" stroke="#000000" points="818.5,-67.5 818.5,-97.5 935.5,-97.5 935.5,-67.5 818.5,-67.5"/>
-<text text-anchor="start" x="826.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/op</text>
-<text text-anchor="middle" x="877" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_strategy.h</text>
+<g id="a_node20"><a xlink:href="op__strategy_8h.html" target="_top" xlink:title="The Relay operator Strategy and related data structure. ">
+<polygon fill="#ffffff" stroke="#000000" points="795,-.5 795,-30.5 912,-30.5 912,-.5 795,-.5"/>
+<text text-anchor="start" x="803" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/op</text>
+<text text-anchor="middle" x="853.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_strategy.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node45 -->
-<g id="edge32" class="edge">
+<g id="edge30" class="edge">
 <title>Node26&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1630.4664,-211.7398C1458.4522,-201.025 1057.8075,-175.1576 1030,-165 1007.8704,-156.9165 1007.3126,-145.9357 987,-134 963.3656,-120.1124 935.3688,-107.0489 913.5349,-97.5538"/>
-<polygon fill="#191970" stroke="#191970" points="1630.2712,-215.2344 1640.4692,-212.3619 1630.7057,-208.2478 1630.2712,-215.2344"/>
+<path fill="none" stroke="#191970" d="M1575.3241,-138.4485C1564.0592,-136.7951 1552.4604,-135.2341 1541.5,-134 1315.6962,-108.5754 1251.5401,-154.7109 1031.5,-98 1031.0207,-97.8765 937.2729,-54.3813 886.0404,-30.6041"/>
+<polygon fill="#191970" stroke="#191970" points="1575.1383,-141.9598 1585.5476,-139.9861 1576.1794,-135.0376 1575.1383,-141.9598"/>
 </g>
 <!-- Node28&#45;&gt;Node27 -->
 <g id="edge5" class="edge">
 <title>Node28&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M2264.0847,-135.5809C2287.3114,-124.4653 2319.7989,-108.9177 2343.6112,-97.5218"/>
-<polygon fill="#191970" stroke="#191970" points="2262.5679,-132.4266 2255.0586,-139.9005 2265.5898,-138.7407 2262.5679,-132.4266"/>
+<path fill="none" stroke="#191970" d="M1358.372,-71.0758C1426.1323,-58.3589 1534.6538,-37.9922 1599.8071,-25.7645"/>
+<polygon fill="#191970" stroke="#191970" points="1357.4531,-67.6871 1348.2703,-72.9717 1358.7443,-74.567 1357.4531,-67.6871"/>
 </g>
 <!-- Node28&#45;&gt;Node29 -->
 <g id="edge6" class="edge">
 <title>Node28&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M2232.0875,-129.9863C2230.2282,-117.5286 2227.8994,-101.9258 2226.4301,-92.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2228.6293,-130.5268 2233.5672,-139.9005 2235.5526,-129.4934 2228.6293,-130.5268"/>
+<path fill="none" stroke="#191970" d="M1322.0665,-67.5368C1343.3707,-54.5605 1373.5663,-36.1687 1391.7689,-25.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1319.9801,-64.7094 1313.2603,-72.9005 1323.6215,-70.6877 1319.9801,-64.7094"/>
 </g>
 <!-- Node28&#45;&gt;Node30 -->
 <g id="edge7" class="edge">
 <title>Node28&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M2201.3753,-136.1695C2173.2504,-125.0194 2133.1466,-109.1203 2103.8907,-97.5218"/>
-<polygon fill="#191970" stroke="#191970" points="2200.2004,-139.4687 2210.7864,-139.9005 2202.7802,-132.9614 2200.2004,-139.4687"/>
+<path fill="none" stroke="#191970" d="M1284.4991,-64.7233C1276.7017,-54.0615 1266.8944,-40.6515 1259.4861,-30.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1281.7512,-66.8949 1290.4795,-72.9005 1287.4014,-62.7627 1281.7512,-66.8949"/>
 </g>
 <!-- Node28&#45;&gt;Node31 -->
 <g id="edge8" class="edge">
 <title>Node28&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M2179.7422,-137.9199C2173.4217,-136.5971 2167.0709,-135.2687 2161,-134 2101.2926,-121.5223 2033.4588,-107.3813 1984.0053,-97.0784"/>
-<polygon fill="#191970" stroke="#191970" points="2179.1,-141.3613 2189.605,-139.9847 2180.5344,-134.5098 2179.1,-141.3613"/>
+<path fill="none" stroke="#191970" d="M1258.8157,-69.6052C1225.3101,-58.4367 1176.8166,-42.2722 1141.5653,-30.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1258.1079,-73.0586 1268.7016,-72.9005 1260.3216,-66.4178 1258.1079,-73.0586"/>
 </g>
 <!-- Node34&#45;&gt;Node35 -->
 <g id="edge13" class="edge">
 <title>Node34&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1491,-124.1199C1491,-112.9072 1491,-100.4031 1491,-92.055"/>
-<polygon fill="#191970" stroke="#191970" points="1487.5001,-124.2966 1491,-134.2967 1494.5001,-124.2967 1487.5001,-124.2966"/>
+<path fill="none" stroke="#191970" d="M2189.9542,-59.5252C2179.9226,-47.7336 2168.2435,-34.0055 2160.6289,-25.055"/>
+<polygon fill="#191970" stroke="#191970" points="2187.4202,-61.948 2196.5658,-67.2967 2192.7518,-57.4121 2187.4202,-61.948"/>
 </g>
 <!-- Node38 -->
 <g id="node14" class="node">
 <title>Node38</title>
 <g id="a_node14"><a xlink:href="dataflow__matcher_8h.html" target="_top" xlink:title="A pattern matcher for matching dataflow properties. ">
-<polygon fill="#ffffff" stroke="#000000" points="1069.5,-.5 1069.5,-30.5 1216.5,-30.5 1216.5,-.5 1069.5,-.5"/>
-<text text-anchor="start" x="1077.5" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/dataflow</text>
-<text text-anchor="middle" x="1143" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_matcher.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1906,-.5 1906,-30.5 2053,-30.5 2053,-.5 1906,-.5"/>
+<text text-anchor="start" x="1914" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/dataflow</text>
+<text text-anchor="middle" x="1979.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_matcher.h</text>
 </a>
 </g>
 </g>
 <!-- Node37&#45;&gt;Node38 -->
 <g id="edge15" class="edge">
 <title>Node37&#45;&gt;Node38</title>
-<path fill="none" stroke="#191970" d="M1148.8654,-128.4127C1159.2575,-120.3306 1169.3237,-110.0941 1175,-98 1185.7699,-75.0534 1169.3632,-47.4142 1156.3383,-30.6639"/>
-<polygon fill="#191970" stroke="#191970" points="1146.596,-125.7343 1140.5389,-134.4269 1150.6948,-131.4089 1146.596,-125.7343"/>
-</g>
-<!-- Node39 -->
-<g id="node15" class="node">
-<title>Node39</title>
-<g id="a_node15"><a xlink:href="dataflow__pattern__functor_8h.html" target="_top" xlink:title="A set of passes for operating on pattern graphs. ">
-<polygon fill="#ffffff" stroke="#000000" points="1018.5,-67.5 1018.5,-97.5 1165.5,-97.5 1165.5,-67.5 1018.5,-67.5"/>
-<text text-anchor="start" x="1026.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/dataflow</text>
-<text text-anchor="middle" x="1092" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_pattern_functor.h</text>
-</a>
-</g>
-</g>
-<!-- Node37&#45;&gt;Node39 -->
-<g id="edge16" class="edge">
-<title>Node37&#45;&gt;Node39</title>
-<path fill="none" stroke="#191970" d="M1105.1988,-124.6103C1102.3606,-115.5553 1099.2403,-105.5998 1096.7464,-97.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1101.9041,-125.8012 1108.2348,-134.2967 1108.5836,-123.7076 1101.9041,-125.8012"/>
-</g>
-<!-- Node39&#45;&gt;Node38 -->
-<g id="edge17" class="edge">
-<title>Node39&#45;&gt;Node38</title>
-<path fill="none" stroke="#191970" d="M1109.8404,-59.0626C1117.0193,-49.6315 1125.0909,-39.0276 1131.4731,-30.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1106.8446,-57.2197 1103.5727,-67.2967 1112.4146,-61.4595 1106.8446,-57.2197"/>
+<path fill="none" stroke="#191970" d="M2002.5957,-58.4837C1997.6073,-49.1996 1992.0498,-38.8565 1987.6366,-30.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1999.5147,-60.1443 2007.331,-67.2967 2005.681,-56.8311 1999.5147,-60.1443"/>
 </g>
 <!-- Node41&#45;&gt;Node29 -->
-<g id="edge21" class="edge">
+<g id="edge19" class="edge">
 <title>Node41&#45;&gt;Node29</title>
-<path fill="none" stroke="#191970" d="M2105.8007,-135.7424C2134.9499,-122.7224 2177.9476,-103.5167 2203.5485,-92.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2104.1945,-132.6265 2096.4913,-139.9005 2107.0494,-139.0179 2104.1945,-132.6265"/>
+<path fill="none" stroke="#191970" d="M1474.6899,-66.6559C1457.725,-53.7395 1434.3204,-35.9201 1420.0849,-25.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1472.8151,-69.6275 1482.8917,-72.9005 1477.0555,-64.058 1472.8151,-69.6275"/>
 </g>
 <!-- Node41&#45;&gt;Node30 -->
-<g id="edge22" class="edge">
+<g id="edge20" class="edge">
 <title>Node41&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M2072.3505,-129.7758C2070.9653,-119.4641 2069.2969,-107.0437 2068.0178,-97.5218"/>
-<polygon fill="#191970" stroke="#191970" points="2068.9103,-130.4555 2073.7105,-139.9005 2075.848,-129.5236 2068.9103,-130.4555"/>
+<path fill="none" stroke="#191970" d="M1450.6113,-70.3237C1409.416,-59.1493 1348.2498,-42.5576 1304.0026,-30.5553"/>
+<polygon fill="#191970" stroke="#191970" points="1449.8057,-73.7316 1460.3732,-72.9717 1451.6383,-66.9757 1449.8057,-73.7316"/>
 </g>
 <!-- Node41&#45;&gt;Node35 -->
-<g id="edge23" class="edge">
+<g id="edge21" class="edge">
 <title>Node41&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1987.8057,-139.4965C1875.4588,-126.6074 1681.6378,-104.3711 1572.254,-91.8219"/>
-<polygon fill="#191970" stroke="#191970" points="1987.4169,-142.9748 1997.7507,-140.6375 1988.2148,-136.0204 1987.4169,-142.9748"/>
+<path fill="none" stroke="#191970" d="M1554.24,-71.1999C1563.6663,-69.6277 1573.334,-68.1572 1582.5,-67 1794.7477,-40.2038 1850.176,-57.1847 2062.5,-31 2075.4034,-29.4087 2089.2745,-27.2465 2102.1696,-25.0425"/>
+<polygon fill="#191970" stroke="#191970" points="1553.4039,-67.7923 1544.1415,-72.936 1554.5899,-74.6911 1553.4039,-67.7923"/>
 </g>
 <!-- Node43&#45;&gt;Node30 -->
-<g id="edge26" class="edge">
+<g id="edge24" class="edge">
 <title>Node43&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M1726.6595,-137.9007C1733.4899,-136.5192 1740.4023,-135.182 1747,-134 1855.7663,-114.5149 1884.2337,-117.4851 1993,-98 1996.0607,-97.4517 1999.1892,-96.8699 2002.3458,-96.2659"/>
-<polygon fill="#191970" stroke="#191970" points="1725.8437,-134.4952 1716.757,-139.9435 1727.258,-141.3509 1725.8437,-134.4952"/>
+<path fill="none" stroke="#191970" d="M1141.8451,-68.4324C1164.2214,-57.3271 1195.3724,-41.867 1218.2323,-30.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1140.2438,-65.3198 1132.8422,-72.9005 1143.3557,-71.59 1140.2438,-65.3198"/>
 </g>
 <!-- Node43&#45;&gt;Node31 -->
-<g id="edge27" class="edge">
+<g id="edge25" class="edge">
 <title>Node43&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1717.8707,-137.2528C1757.8956,-126.0792 1817.168,-109.5323 1860.0704,-97.5553"/>
-<polygon fill="#191970" stroke="#191970" points="1716.8219,-133.9117 1708.1313,-139.9717 1718.7041,-140.6539 1716.8219,-133.9117"/>
+<path fill="none" stroke="#191970" d="M1108.5787,-63.1042C1105.9461,-52.7287 1102.7532,-40.145 1100.3115,-30.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1105.2124,-64.0685 1111.0643,-72.9005 1111.9974,-62.3469 1105.2124,-64.0685"/>
 </g>
 <!-- Node43&#45;&gt;Node35 -->
-<g id="edge28" class="edge">
+<g id="edge26" class="edge">
 <title>Node43&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1638.263,-136.416C1602.6922,-123.3928 1548.9471,-103.7156 1517.1708,-92.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1637.1868,-139.7491 1647.7806,-139.9005 1639.5935,-133.1758 1637.1868,-139.7491"/>
+<path fill="none" stroke="#191970" d="M1183.3665,-71.6163C1196.6659,-69.8394 1210.4914,-68.1928 1223.5,-67 1595.1729,-32.9212 1691.3912,-70.7541 2062.5,-31 2076.0578,-29.5477 2090.6633,-27.3219 2104.0825,-25.0062"/>
+<polygon fill="#191970" stroke="#191970" points="1182.7523,-68.1677 1173.3211,-72.9952 1183.7042,-75.1027 1182.7523,-68.1677"/>
 </g>
 <!-- Node44&#45;&gt;Node35 -->
-<g id="edge31" class="edge">
+<g id="edge29" class="edge">
 <title>Node44&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M989.0076,-139.4668C1002.587,-137.5798 1016.7348,-135.6752 1030,-134 1162.8304,-117.2255 1317.5585,-100.5187 1409.7232,-90.8661"/>
-<polygon fill="#191970" stroke="#191970" points="988.176,-136.049 978.7581,-140.9022 989.1469,-142.9813 988.176,-136.049"/>
+<path fill="none" stroke="#191970" d="M966.2611,-74.0795C990.0577,-71.4364 1016.921,-68.7539 1041.5,-67 1494.4082,-34.6822 1610.8419,-77.639 2062.5,-31 2076.0632,-29.5994 2090.6705,-27.3906 2104.0897,-25.0747"/>
+<polygon fill="#191970" stroke="#191970" points="965.8104,-70.6081 956.2658,-75.2071 966.5952,-77.564 965.8104,-70.6081"/>
 </g>
 <!-- Node44&#45;&gt;Node45 -->
-<g id="edge30" class="edge">
+<g id="edge28" class="edge">
 <title>Node44&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M904.7725,-125.7735C898.7666,-116.4154 892.0449,-105.9421 886.7188,-97.6432"/>
-<polygon fill="#191970" stroke="#191970" points="901.8958,-127.7712 910.2426,-134.2967 907.7869,-123.9903 901.8958,-127.7712"/>
+<path fill="none" stroke="#191970" d="M881.9184,-58.7735C875.7728,-49.4154 868.8948,-38.9421 863.4448,-30.6432"/>
+<polygon fill="#191970" stroke="#191970" points="879.1008,-60.8592 887.5157,-67.2967 884.9519,-57.0167 879.1008,-60.8592"/>
 </g>
 <!-- Node47 -->
-<g id="node23" class="node">
+<g id="node22" class="node">
 <title>Node47</title>
-<g id="a_node23"><a xlink:href="greedy_8h.html" target="_top" xlink:title="This header file contains helper methods used in greedy algorithms for planning memory for USMP...">
-<polygon fill="#ffffff" stroke="#000000" points="661,-134.5 661,-164.5 779,-164.5 779,-134.5 661,-134.5"/>
-<text text-anchor="start" x="669" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/usmp</text>
-<text text-anchor="middle" x="720" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/algo/greedy.h</text>
+<g id="a_node22"><a xlink:href="greedy_8h.html" target="_top" xlink:title="This header file contains helper methods used in greedy algorithms for planning memory for USMP...">
+<polygon fill="#ffffff" stroke="#000000" points="211.5,-67.5 211.5,-97.5 329.5,-97.5 329.5,-67.5 211.5,-67.5"/>
+<text text-anchor="start" x="219.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/usmp</text>
+<text text-anchor="middle" x="270.5" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/algo/greedy.h</text>
 </a>
 </g>
 </g>
 <!-- Node46&#45;&gt;Node47 -->
-<g id="edge37" class="edge">
+<g id="edge35" class="edge">
 <title>Node46&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M517.0705,-204.3237C558.4325,-193.1493 619.8463,-176.5576 664.2727,-164.5553"/>
-<polygon fill="#191970" stroke="#191970" points="516.01,-200.9846 507.269,-206.9717 517.8357,-207.7424 516.01,-200.9846"/>
+<path fill="none" stroke="#191970" d="M157.2079,-135.5809C180.9324,-124.4653 214.116,-108.9177 238.4386,-97.5218"/>
+<polygon fill="#191970" stroke="#191970" points="155.5588,-132.4884 147.9884,-139.9005 158.5287,-138.8271 155.5588,-132.4884"/>
 </g>
 <!-- Node48 -->
-<g id="node24" class="node">
+<g id="node23" class="node">
 <title>Node48</title>
-<g id="a_node24"><a xlink:href="detail_2extern_8h.html" target="_top" xlink:title="Helpers for using external functions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="345.5,-67.5 345.5,-97.5 472.5,-97.5 472.5,-67.5 345.5,-67.5"/>
-<text text-anchor="start" x="353.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/detail</text>
-<text text-anchor="middle" x="409" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extern.h</text>
+<g id="a_node23"><a xlink:href="detail_2extern_8h.html" target="_top" xlink:title="Helpers for using external functions. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="160,-.5 160,-30.5 287,-30.5 287,-.5 160,-.5"/>
+<text text-anchor="start" x="168" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/detail</text>
+<text text-anchor="middle" x="223.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extern.h</text>
 </a>
 </g>
 </g>
 <!-- Node46&#45;&gt;Node48 -->
-<g id="edge38" class="edge">
+<g id="edge36" class="edge">
 <title>Node46&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M463.1919,-197.7654C450.596,-170.9741 427.684,-122.2405 416.1044,-97.611"/>
-<polygon fill="#191970" stroke="#191970" points="460.047,-199.3025 467.4692,-206.8631 466.3818,-196.3242 460.047,-199.3025"/>
+<path fill="none" stroke="#191970" d="M140.4182,-131.4684C159.499,-104.8347 194.8738,-55.4575 212.6742,-30.611"/>
+<polygon fill="#191970" stroke="#191970" points="137.3828,-129.6955 134.4041,-139.8631 143.0732,-133.7723 137.3828,-129.6955"/>
 </g>
 <!-- Node53 -->
-<g id="node25" class="node">
+<g id="node24" class="node">
 <title>Node53</title>
-<g id="a_node25"><a xlink:href="elemwise_8h.html" target="_top" xlink:title="Elementwise op constructions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="461.5,-140 461.5,-159 616.5,-159 616.5,-140 461.5,-140"/>
-<text text-anchor="middle" x="539" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/elemwise.h</text>
+<g id="a_node24"><a xlink:href="elemwise_8h.html" target="_top" xlink:title="Elementwise op constructions. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="0,-73 0,-92 155,-92 155,-73 0,-73"/>
+<text text-anchor="middle" x="77.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/elemwise.h</text>
 </a>
 </g>
 </g>
 <!-- Node46&#45;&gt;Node53 -->
-<g id="edge39" class="edge">
+<g id="edge37" class="edge">
 <title>Node46&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M488.9215,-199.5785C501.7361,-186.7639 518.8782,-169.6218 529.4183,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="486.1957,-197.3546 481.5995,-206.9005 491.1454,-202.3043 486.1957,-197.3546"/>
+<path fill="none" stroke="#191970" d="M114.3273,-131.8486C104.8279,-119.1194 92.3679,-102.4229 84.6505,-92.0817"/>
+<polygon fill="#191970" stroke="#191970" points="111.5503,-133.9795 120.3362,-139.9005 117.1604,-129.7929 111.5503,-133.9795"/>
 </g>
 <!-- Node60 -->
-<g id="node27" class="node">
+<g id="node26" class="node">
 <title>Node60</title>
-<g id="a_node27"><a xlink:href="int__solver_8h.html" target="_top" xlink:title="integer constraints data structures and solvers ">
-<polygon fill="#ffffff" stroke="#000000" points="49,-201.5 49,-231.5 165,-231.5 165,-201.5 49,-201.5"/>
-<text text-anchor="start" x="57" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/arith/int</text>
-<text text-anchor="middle" x="107" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_solver.h</text>
+<g id="a_node26"><a xlink:href="int__solver_8h.html" target="_top" xlink:title="integer constraints data structures and solvers ">
+<polygon fill="#ffffff" stroke="#000000" points="545.5,-134.5 545.5,-164.5 661.5,-164.5 661.5,-134.5 545.5,-134.5"/>
+<text text-anchor="start" x="553.5" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/arith/int</text>
+<text text-anchor="middle" x="603.5" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_solver.h</text>
 </a>
 </g>
 </g>
 <!-- Node59&#45;&gt;Node60 -->
-<g id="edge41" class="edge">
+<g id="edge39" class="edge">
 <title>Node59&#45;&gt;Node60</title>
-<path fill="none" stroke="#191970" d="M204.4121,-269.1339C183.9011,-258.0514 155.6112,-242.7657 134.8015,-231.5218"/>
-<polygon fill="#191970" stroke="#191970" points="202.7721,-272.2261 213.2338,-273.9005 206.0998,-266.0675 202.7721,-272.2261"/>
+<path fill="none" stroke="#191970" d="M654.4242,-199.6753C643.4566,-188.8691 629.3353,-174.9553 618.746,-164.5218"/>
+<polygon fill="#191970" stroke="#191970" points="652.1775,-202.3752 661.7573,-206.9005 657.0905,-197.3889 652.1775,-202.3752"/>
 </g>
 <!-- Node61 -->
-<g id="node28" class="node">
+<g id="node27" class="node">
 <title>Node61</title>
-<g id="a_node28"><a xlink:href="tensor_8h.html" target="_top" xlink:title="Dataflow tensor object. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="259.5,-207 259.5,-226 388.5,-226 388.5,-207 259.5,-207"/>
-<text text-anchor="middle" x="324" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/te/tensor.h</text>
+<g id="a_node27"><a xlink:href="tir_2ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/tir/ir.h">
+<polygon fill="#ffffff" stroke="#000000" points="423.5,-134.5 423.5,-164.5 527.5,-164.5 527.5,-134.5 423.5,-134.5"/>
+<text text-anchor="start" x="431.5" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="475.5" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/tir/ir.h</text>
 </a>
 </g>
 </g>
 <!-- Node59&#45;&gt;Node61 -->
-<g id="edge42" class="edge">
+<g id="edge40" class="edge">
 <title>Node59&#45;&gt;Node61</title>
-<path fill="none" stroke="#191970" d="M252.5008,-268.0102C270.4667,-255.067 295.5176,-237.0195 310.7001,-226.0817"/>
-<polygon fill="#191970" stroke="#191970" points="250.3925,-265.2153 244.3246,-273.9005 254.4842,-270.8949 250.3925,-265.2153"/>
+<path fill="none" stroke="#191970" d="M633.778,-203.6052C601.1058,-192.4367 553.8187,-176.2722 519.4443,-164.5218"/>
+<polygon fill="#191970" stroke="#191970" points="632.8234,-206.9777 643.418,-206.9005 635.0877,-200.354 632.8234,-206.9777"/>
 </g>
-<!-- Node63 -->
+<!-- Node62 -->
+<g id="node28" class="node">
+<title>Node62</title>
+<g id="a_node28"><a xlink:href="tensor_8h.html" target="_top" xlink:title="Dataflow tensor object. ">
+<polygon fill="#ffffff" stroke="#ff0000" points="718,-140 718,-159 847,-159 847,-140 718,-140"/>
+<text text-anchor="middle" x="782.5" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/te/tensor.h</text>
+</a>
+</g>
+</g>
+<!-- Node59&#45;&gt;Node62 -->
+<g id="edge41" class="edge">
+<title>Node59&#45;&gt;Node62</title>
+<path fill="none" stroke="#191970" d="M696.0011,-201.711C717.5142,-188.7256 748.1755,-170.2184 766.6259,-159.0817"/>
+<polygon fill="#191970" stroke="#191970" points="694.1562,-198.7364 687.4036,-206.9005 697.7736,-204.7293 694.1562,-198.7364"/>
+</g>
+<!-- Node64 -->
 <g id="node29" class="node">
-<title>Node63</title>
+<title>Node64</title>
 <g id="a_node29"><a xlink:href="operation_8h.html" target="_top" xlink:title="Operation node can generate one or multiple Tensors. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="221,-140 221,-159 367,-159 367,-140 221,-140"/>
-<text text-anchor="middle" x="294" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/te/operation.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="385.5,-73 385.5,-92 531.5,-92 531.5,-73 385.5,-73"/>
+<text text-anchor="middle" x="458.5" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/te/operation.h</text>
 </a>
 </g>
 </g>
-<!-- Node59&#45;&gt;Node63 -->
-<g id="edge49" class="edge">
-<title>Node59&#45;&gt;Node63</title>
-<path fill="none" stroke="#191970" d="M232.857,-263.7911C235.0981,-246.4881 239.9099,-221.0275 250,-201 258.36,-184.4064 273.1611,-168.6781 283.2681,-159.0679"/>
-<polygon fill="#191970" stroke="#191970" points="229.366,-263.5159 231.7259,-273.8445 236.3221,-264.2986 229.366,-263.5159"/>
+<!-- Node59&#45;&gt;Node64 -->
+<g id="edge48" class="edge">
+<title>Node59&#45;&gt;Node64</title>
+<path fill="none" stroke="#191970" d="M605.1178,-205.3403C531.0185,-192.566 420.6035,-172.4479 414.5,-165 395.2596,-141.5216 427.606,-108.3229 446.4404,-92.1324"/>
+<polygon fill="#191970" stroke="#191970" points="604.8698,-208.8488 615.3179,-207.092 606.0546,-201.9498 604.8698,-208.8488"/>
 </g>
-<!-- Node89 -->
+<!-- Node90 -->
 <g id="node30" class="node">
-<title>Node89</title>
+<title>Node90</title>
 <g id="a_node30"><a xlink:href="pad__utils_8h.html" target="_top" xlink:title="Padding helpers. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="153.5,-67.5 153.5,-97.5 280.5,-97.5 280.5,-67.5 153.5,-67.5"/>
-<text text-anchor="start" x="161.5" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/detail</text>
-<text text-anchor="middle" x="217" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/pad_utils.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="344,-.5 344,-30.5 471,-30.5 471,-.5 344,-.5"/>
+<text text-anchor="start" x="352" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/detail</text>
+<text text-anchor="middle" x="407.5" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/pad_utils.h</text>
 </a>
 </g>
 </g>
-<!-- Node59&#45;&gt;Node89 -->
-<g id="edge52" class="edge">
-<title>Node59&#45;&gt;Node89</title>
-<path fill="none" stroke="#191970" d="M226.8302,-263.6718C222.1317,-240.232 214.7868,-199.9843 212,-165 210.9059,-151.2657 211.3335,-147.7616 212,-134 212.591,-121.7974 213.9652,-108.0254 215.1236,-97.7954"/>
-<polygon fill="#191970" stroke="#191970" points="223.4555,-264.6384 228.8925,-273.7317 230.3129,-263.2325 223.4555,-264.6384"/>
+<!-- Node59&#45;&gt;Node90 -->
+<g id="edge51" class="edge">
+<title>Node59&#45;&gt;Node90</title>
+<path fill="none" stroke="#191970" d="M605.3367,-206.9739C531.7058,-195.8623 420.976,-177.4325 405.5,-165 370.0887,-136.5527 364.344,-110.7657 376.5,-67 380.1866,-53.7272 388.3133,-40.4497 395.3211,-30.7012"/>
+<polygon fill="#191970" stroke="#191970" points="605.058,-210.4712 615.4663,-208.4919 606.0954,-203.5485 605.058,-210.4712"/>
 </g>
 <!-- Node57 -->
 <g id="node31" class="node">
 <title>Node57</title>
 <g id="a_node31"><a xlink:href="topi_2nn_8h.html" target="_top" xlink:title="NN op constructions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="0,-73 0,-92 120,-92 120,-73 0,-73"/>
-<text text-anchor="middle" x="60" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/nn.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="551.5,-6 551.5,-25 671.5,-25 671.5,-6 551.5,-6"/>
+<text text-anchor="middle" x="611.5" y="-13" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/topi/nn.h</text>
 </a>
 </g>
 </g>
 <!-- Node59&#45;&gt;Node57 -->
-<g id="edge53" class="edge">
+<g id="edge52" class="edge">
 <title>Node59&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M164.8376,-274.3194C115.952,-266.0223 55.3387,-252.0223 40,-232 7.0358,-188.9702 40.4574,-117.781 54.4592,-92.1053"/>
-<polygon fill="#191970" stroke="#191970" points="164.3197,-277.7812 174.7556,-275.9532 165.4575,-270.8743 164.3197,-277.7812"/>
+<path fill="none" stroke="#191970" d="M673.7357,-196.3529C675.025,-179.4548 675.5286,-154.8155 670.5,-134 659.9764,-90.4385 631.2521,-44.4847 618.1607,-25.0582"/>
+<polygon fill="#191970" stroke="#191970" points="670.2171,-196.3915 672.7759,-206.6727 677.187,-197.0398 670.2171,-196.3915"/>
 </g>
-<!-- Node110 -->
+<!-- Node111 -->
 <g id="node32" class="node">
-<title>Node110</title>
+<title>Node111</title>
 <g id="a_node32"><a xlink:href="data__layout_8h.html" target="_top" xlink:title="Layout expression to describe the data organization of a tensor. And BijectiveLayout to mapping two d...">
-<polygon fill="#ffffff" stroke="#ff0000" points="555.5,-201.5 555.5,-231.5 668.5,-231.5 668.5,-201.5 555.5,-201.5"/>
-<text text-anchor="start" x="563.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/data</text>
-<text text-anchor="middle" x="612" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_layout.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="904,-134.5 904,-164.5 1017,-164.5 1017,-134.5 904,-134.5"/>
+<text text-anchor="start" x="912" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/data</text>
+<text text-anchor="middle" x="960.5" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_layout.h</text>
 </a>
 </g>
 </g>
-<!-- Node59&#45;&gt;Node110 -->
-<g id="edge50" class="edge">
-<title>Node59&#45;&gt;Node110</title>
-<path fill="none" stroke="#191970" d="M297.2017,-274.0268C361.1081,-264.564 460.4916,-249.0099 546,-232 549.0014,-231.4029 552.0739,-230.7673 555.1722,-230.107"/>
-<polygon fill="#191970" stroke="#191970" points="296.64,-270.5717 287.2573,-275.4925 297.6608,-277.4969 296.64,-270.5717"/>
+<!-- Node59&#45;&gt;Node111 -->
+<g id="edge49" class="edge">
+<title>Node59&#45;&gt;Node111</title>
+<path fill="none" stroke="#191970" d="M722.3863,-204.7028C773.2048,-192.9214 851.1444,-174.8523 903.89,-162.6241"/>
+<polygon fill="#191970" stroke="#191970" points="721.5509,-201.3036 712.5997,-206.9717 723.1319,-208.1227 721.5509,-201.3036"/>
 </g>
-<!-- Node61&#45;&gt;Node44 -->
+<!-- Node62&#45;&gt;Node44 -->
+<g id="edge42" class="edge">
+<title>Node62&#45;&gt;Node44</title>
+<path fill="none" stroke="#191970" d="M807.674,-134.8334C826.6526,-123.7763 852.5909,-108.6644 871.7164,-97.5218"/>
+<polygon fill="#191970" stroke="#191970" points="805.8553,-131.8423 798.9767,-139.9005 809.3792,-137.8906 805.8553,-131.8423"/>
+</g>
+<!-- Node62&#45;&gt;Node45 -->
 <g id="edge43" class="edge">
-<title>Node61&#45;&gt;Node44</title>
-<path fill="none" stroke="#191970" d="M374.7655,-204.9219C382.1975,-203.4598 389.7786,-202.0952 397,-201 569.5397,-174.8319 614.4365,-183.1775 788,-165 811.9813,-162.4884 838.3792,-159.4445 861.2249,-156.7214"/>
-<polygon fill="#191970" stroke="#191970" points="373.8701,-201.5324 364.7711,-206.96 375.2688,-208.3912 373.8701,-201.5324"/>
+<title>Node62&#45;&gt;Node45</title>
+<path fill="none" stroke="#191970" d="M782.2758,-129.7802C782.8767,-112.2808 785.7754,-86.535 796.5,-67 804.5227,-52.3865 818.3006,-39.7654 830.3006,-30.6752"/>
+<polygon fill="#191970" stroke="#191970" points="778.7742,-129.8993 782.1536,-139.9407 785.7737,-129.9836 778.7742,-129.8993"/>
 </g>
-<!-- Node61&#45;&gt;Node45 -->
+<!-- Node62&#45;&gt;Node64 -->
 <g id="edge44" class="edge">
-<title>Node61&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M342.8999,-200.3329C366.6518,-181.0072 409.4786,-149.3557 452,-134 518.0609,-110.1435 718.2289,-93.442 818.4794,-86.334"/>
-<polygon fill="#191970" stroke="#191970" points="340.5129,-197.7654 335.0481,-206.8422 344.9805,-203.1544 340.5129,-197.7654"/>
+<title>Node62&#45;&gt;Node64</title>
+<path fill="none" stroke="#191970" d="M726.3021,-137.8788C663.5229,-124.8967 563.0669,-104.1234 504.6803,-92.0496"/>
+<polygon fill="#191970" stroke="#191970" points="725.9212,-141.374 736.4228,-139.9717 727.3388,-134.5191 725.9212,-141.374"/>
 </g>
-<!-- Node61&#45;&gt;Node63 -->
+<!-- Node64&#45;&gt;Node48 -->
 <g id="edge45" class="edge">
-<title>Node61&#45;&gt;Node63</title>
-<path fill="none" stroke="#191970" d="M315.5988,-197.7374C309.9678,-185.1614 302.7871,-169.1246 298.2903,-159.0817"/>
-<polygon fill="#191970" stroke="#191970" points="312.4206,-199.204 319.7017,-206.9005 318.8094,-196.3433 312.4206,-199.204"/>
+<title>Node64&#45;&gt;Node48</title>
+<path fill="none" stroke="#191970" d="M415.033,-70.1073C375.813,-58.9254 318.0243,-42.4495 276.1883,-30.5218"/>
+<polygon fill="#191970" stroke="#191970" points="414.2538,-73.5245 424.8302,-72.9005 416.1731,-66.7928 414.2538,-73.5245"/>
 </g>
-<!-- Node63&#45;&gt;Node48 -->
+<!-- Node64&#45;&gt;Node90 -->
 <g id="edge46" class="edge">
-<title>Node63&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M319.174,-134.8334C338.1526,-123.7763 364.0909,-108.6644 383.2164,-97.5218"/>
-<polygon fill="#191970" stroke="#191970" points="317.3553,-131.8423 310.4767,-139.9005 320.8792,-137.8906 317.3553,-131.8423"/>
+<title>Node64&#45;&gt;Node90</title>
+<path fill="none" stroke="#191970" d="M444.9685,-64.7233C436.8528,-54.0615 426.6451,-40.6515 418.9345,-30.5218"/>
+<polygon fill="#191970" stroke="#191970" points="442.3511,-67.0634 451.1929,-72.9005 447.921,-62.8236 442.3511,-67.0634"/>
 </g>
-<!-- Node63&#45;&gt;Node89 -->
+<!-- Node64&#45;&gt;Node57 -->
 <g id="edge47" class="edge">
-<title>Node63&#45;&gt;Node89</title>
-<path fill="none" stroke="#191970" d="M275.3835,-133.3012C262.8675,-122.4107 246.4875,-108.1579 234.2638,-97.5218"/>
-<polygon fill="#191970" stroke="#191970" points="273.1263,-135.9767 282.9678,-139.9005 277.7213,-130.6959 273.1263,-135.9767"/>
-</g>
-<!-- Node63&#45;&gt;Node57 -->
-<g id="edge48" class="edge">
-<title>Node63&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M250.6155,-137.0779C205.1772,-124.0678 134.7908,-103.9145 93.4643,-92.0817"/>
-<polygon fill="#191970" stroke="#191970" points="249.8964,-140.5126 260.4735,-139.9005 251.8233,-133.783 249.8964,-140.5126"/>
+<title>Node64&#45;&gt;Node57</title>
+<path fill="none" stroke="#191970" d="M489.9167,-68.7424C519.6489,-55.7224 563.5066,-36.5167 589.6195,-25.0817"/>
+<polygon fill="#191970" stroke="#191970" points="488.1774,-65.6831 480.4212,-72.9005 490.9854,-72.0952 488.1774,-65.6831"/>
 </g>
-<!-- Node110&#45;&gt;Node44 -->
-<g id="edge51" class="edge">
-<title>Node110&#45;&gt;Node44</title>
-<path fill="none" stroke="#191970" d="M678.6219,-200.8862C747.1326,-185.6121 814.4899,-171.3897 861.2437,-161.6441"/>
-<polygon fill="#191970" stroke="#191970" points="677.5034,-197.5498 668.5078,-203.147 679.0305,-204.3812 677.5034,-197.5498"/>
+<!-- Node111&#45;&gt;Node44 -->
+<g id="edge50" class="edge">
+<title>Node111&#45;&gt;Node44</title>
+<path fill="none" stroke="#191970" d="M939.273,-126.9253C930.2062,-117.2828 919.8643,-106.2843 911.7391,-97.6432"/>
+<polygon fill="#191970" stroke="#191970" points="936.8042,-129.409 946.2043,-134.2967 941.9039,-124.6138 936.8042,-129.409"/>
 </g>
-<!-- Node115 -->
+<!-- Node116 -->
 <g id="node37" class="node">
-<title>Node115</title>
+<title>Node116</title>
 <g id="a_node37"><a xlink:href="target_8h.html" target="_top" xlink:title="Compilation target object. ">
-<polygon fill="#ffffff" stroke="#000000" points="1335.5,-335.5 1335.5,-365.5 1442.5,-365.5 1442.5,-335.5 1335.5,-335.5"/>
-<text text-anchor="start" x="1343.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1389" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1704,-268.5 1704,-298.5 1811,-298.5 1811,-268.5 1704,-268.5"/>
+<text text-anchor="start" x="1712" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="1757.5" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target.h</text>
 </a>
 </g>
 </g>
-<!-- Node114&#45;&gt;Node115 -->
-<g id="edge58" class="edge">
-<title>Node114&#45;&gt;Node115</title>
-<path fill="none" stroke="#191970" d="M1389,-392.0249C1389,-383.128 1389,-373.4287 1389,-365.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1385.5001,-392.2966 1389,-402.2967 1392.5001,-392.2967 1385.5001,-392.2966"/>
+<!-- Node115&#45;&gt;Node116 -->
+<g id="edge57" class="edge">
+<title>Node115&#45;&gt;Node116</title>
+<path fill="none" stroke="#191970" d="M1757.5,-325.0249C1757.5,-316.128 1757.5,-306.4287 1757.5,-298.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1754.0001,-325.2966 1757.5,-335.2967 1761.0001,-325.2967 1754.0001,-325.2966"/>
 </g>
-<!-- Node115&#45;&gt;Node35 -->
-<g id="edge73" class="edge">
-<title>Node115&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1359.2026,-329.4369C1306.0019,-289.3048 1204.9928,-200.0028 1258,-134 1276.9692,-110.3802 1350.3091,-96.9397 1409.7059,-89.7568"/>
-<polygon fill="#191970" stroke="#191970" points="1357.1831,-332.2967 1367.2996,-335.4446 1361.3542,-326.6751 1357.1831,-332.2967"/>
+<!-- Node116&#45;&gt;Node35 -->
+<g id="edge72" class="edge">
+<title>Node116&#45;&gt;Node35</title>
+<path fill="none" stroke="#191970" d="M1821.2748,-277.0155C1850.5753,-274.1151 1885.7885,-270.74 1917.5,-268 2022.7446,-258.9066 2291.7149,-266.6705 2391.5,-232 2414.788,-223.9085 2415.5115,-212.1495 2437.5,-201 2523.8683,-157.2062 2585.5855,-184.307 2629.5,-98 2635.7481,-85.7204 2638.7046,-77.2519 2629.5,-67 2603.5895,-38.1414 2362.8984,-24.0147 2233.6641,-18.4591"/>
+<polygon fill="#191970" stroke="#191970" points="1820.7775,-273.5476 1811.1728,-278.0198 1821.4701,-280.5133 1820.7775,-273.5476"/>
+</g>
+<!-- Node116&#45;&gt;Node42 -->
+<g id="edge69" class="edge">
+<title>Node116&#45;&gt;Node42</title>
+<path fill="none" stroke="#191970" d="M1821.0792,-268.6849C1822.5647,-268.4431 1824.0399,-268.2142 1825.5,-268 1943.3961,-250.7059 2251.1865,-279.4249 2360.5,-232 2379.2458,-223.8673 2479.3585,-123.947 2510.945,-92.1499"/>
+<polygon fill="#191970" stroke="#191970" points="1820.4072,-265.2494 1811.1749,-270.447 1821.6334,-272.1411 1820.4072,-265.2494"/>
 </g>
-<!-- Node115&#45;&gt;Node42 -->
+<!-- Node116&#45;&gt;Node44 -->
 <g id="edge70" class="edge">
-<title>Node115&#45;&gt;Node42</title>
-<path fill="none" stroke="#191970" d="M1452.737,-344.1842C1481.7895,-341.3317 1516.6297,-337.9478 1548,-335 1636.416,-326.6918 1863.7705,-332.5369 1946,-299 1966.1819,-290.7689 1964.2232,-277.1619 1984,-268 2010.65,-255.654 2416.7962,-184.9777 2567.3806,-159.0416"/>
-<polygon fill="#191970" stroke="#191970" points="1452.3268,-340.7076 1442.7173,-345.1694 1453.0118,-347.674 1452.3268,-340.7076"/>
+<title>Node116&#45;&gt;Node44</title>
+<path fill="none" stroke="#191970" d="M1693.3655,-282.8344C1557.3581,-280.6591 1244.4395,-271.3938 1146.5,-232 1082.2955,-206.1753 1085.1518,-169.0827 1025.5,-134 1000.0006,-119.0032 969.3146,-106.4903 944.3807,-97.555"/>
+<polygon fill="#191970" stroke="#191970" points="1693.5762,-286.3379 1703.6282,-282.9904 1693.6826,-279.3387 1693.5762,-286.3379"/>
 </g>
-<!-- Node115&#45;&gt;Node44 -->
+<!-- Node116&#45;&gt;Node45 -->
 <g id="edge71" class="edge">
-<title>Node115&#45;&gt;Node44</title>
-<path fill="none" stroke="#191970" d="M1345.9423,-331.1403C1326.8821,-322.0745 1304.4289,-310.7048 1285,-299 1264.8195,-286.8424 1262.8027,-279.0594 1242,-268 1152.5847,-220.4638 1039.9289,-183.8056 974.3291,-164.5405"/>
-<polygon fill="#191970" stroke="#191970" points="1344.4594,-334.3106 1354.9984,-335.397 1347.4372,-327.9755 1344.4594,-334.3106"/>
+<title>Node116&#45;&gt;Node45</title>
+<path fill="none" stroke="#191970" d="M1693.5821,-280.5186C1550.6301,-273.5273 1208.7597,-254.9228 1095.5,-232 1003.2057,-213.3204 974.2247,-215.1111 894.5,-165 876.9371,-153.9608 837.293,-117.2247 829.5,-98 820.2537,-75.1903 832.8966,-47.5115 843.0532,-30.7159"/>
+<polygon fill="#191970" stroke="#191970" points="1693.7757,-284.032 1703.9338,-281.0215 1694.1154,-277.0403 1693.7757,-284.032"/>
 </g>
-<!-- Node115&#45;&gt;Node45 -->
-<g id="edge72" class="edge">
-<title>Node115&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1325.1402,-348.3649C1214.8848,-343.8673 993.9164,-331.0544 924,-299 884.2652,-280.7829 869.451,-272.0771 852,-232 834.6113,-192.0661 842.2415,-176.4483 852,-134 854.9838,-121.0206 861.6936,-107.5187 867.4113,-97.5975"/>
-<polygon fill="#191970" stroke="#191970" points="1325.3246,-351.8749 1335.4557,-348.7746 1325.6025,-344.8804 1325.3246,-351.8749"/>
-</g>
-<!-- Node115&#45;&gt;Node113 -->
-<g id="edge77" class="edge">
-<title>Node115&#45;&gt;Node113</title>
-<path fill="none" stroke="#191970" d="M1452.5703,-335.6202C1454.0585,-335.3979 1455.5366,-335.1904 1457,-335 1914.3105,-275.4883 2037.2159,-362.4259 2494,-299 2497.0123,-298.5817 2500.09,-298.0793 2503.1857,-297.515"/>
-<polygon fill="#191970" stroke="#191970" points="1451.9358,-332.1777 1442.6523,-337.2832 1453.0934,-339.0813 1451.9358,-332.1777"/>
+<!-- Node116&#45;&gt;Node114 -->
+<g id="edge76" class="edge">
+<title>Node116&#45;&gt;Node114</title>
+<path fill="none" stroke="#191970" d="M1821.0716,-268.6305C1822.5594,-268.405 1824.0371,-268.1942 1825.5,-268 2223.6591,-215.1441 2330.8081,-288.2638 2728.5,-232 2731.5112,-231.574 2734.588,-231.0654 2737.6831,-230.4965"/>
+<polygon fill="#191970" stroke="#191970" points="1820.4312,-265.189 1811.1557,-270.3091 1821.5996,-272.0908 1820.4312,-265.189"/>
 </g>
-<!-- Node100 -->
+<!-- Node101 -->
 <g id="node38" class="node">
-<title>Node100</title>
+<title>Node101</title>
 <g id="a_node38"><a xlink:href="search__task_8h.html" target="_top" xlink:title="Meta information and hardware parameters for a search task. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1661,-268.5 1661,-298.5 1813,-298.5 1813,-268.5 1661,-268.5"/>
-<text text-anchor="start" x="1669" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/auto_scheduler</text>
-<text text-anchor="middle" x="1737" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_task.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1401.5,-201.5 1401.5,-231.5 1553.5,-231.5 1553.5,-201.5 1401.5,-201.5"/>
+<text text-anchor="start" x="1409.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/auto_scheduler</text>
+<text text-anchor="middle" x="1477.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_task.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node100 -->
-<g id="edge59" class="edge">
-<title>Node115&#45;&gt;Node100</title>
-<path fill="none" stroke="#191970" d="M1452.6191,-335.9116C1454.0927,-335.6018 1455.5546,-335.2975 1457,-335 1467.0303,-332.9354 1581.4497,-311.9552 1660.8778,-297.4191"/>
-<polygon fill="#191970" stroke="#191970" points="1451.8203,-332.5033 1442.7762,-338.0218 1453.2877,-339.3478 1451.8203,-332.5033"/>
+<!-- Node116&#45;&gt;Node101 -->
+<g id="edge58" class="edge">
+<title>Node116&#45;&gt;Node101</title>
+<path fill="none" stroke="#191970" d="M1693.9883,-268.3026C1647.9893,-257.2957 1586.0944,-242.4851 1540.268,-231.5195"/>
+<polygon fill="#191970" stroke="#191970" points="1693.3045,-271.7377 1703.8445,-270.661 1694.9336,-264.9299 1693.3045,-271.7377"/>
 </g>
-<!-- Node108 -->
+<!-- Node109 -->
 <g id="node39" class="node">
-<title>Node108</title>
+<title>Node109</title>
 <g id="a_node39"><a xlink:href="driver__api_8h.html" target="_top" xlink:title="Compiler driver APIs to drive the compilation. ">
-<polygon fill="#ffffff" stroke="#000000" points="1831,-268.5 1831,-298.5 1937,-298.5 1937,-268.5 1831,-268.5"/>
-<text text-anchor="start" x="1839" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/driver</text>
-<text text-anchor="middle" x="1884" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/driver_api.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1571.5,-201.5 1571.5,-231.5 1677.5,-231.5 1677.5,-201.5 1571.5,-201.5"/>
+<text text-anchor="start" x="1579.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/driver</text>
+<text text-anchor="middle" x="1624.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/driver_api.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node108 -->
-<g id="edge60" class="edge">
-<title>Node115&#45;&gt;Node108</title>
-<path fill="none" stroke="#191970" d="M1452.5888,-335.7466C1454.0714,-335.4863 1455.5434,-335.2369 1457,-335 1617.8957,-308.8346 1661.3861,-326.8426 1822,-299 1824.8424,-298.5073 1827.7469,-297.9548 1830.6716,-297.3597"/>
-<polygon fill="#191970" stroke="#191970" points="1451.8815,-332.3182 1442.6991,-337.6035 1453.1733,-339.1979 1451.8815,-332.3182"/>
+<!-- Node116&#45;&gt;Node109 -->
+<g id="edge59" class="edge">
+<title>Node116&#45;&gt;Node109</title>
+<path fill="none" stroke="#191970" d="M1718.6187,-263.9132C1698.2357,-253.645 1673.6428,-241.2561 1654.7083,-231.7177"/>
+<polygon fill="#191970" stroke="#191970" points="1717.1467,-267.0906 1727.6522,-268.4639 1720.2961,-260.8391 1717.1467,-267.0906"/>
 </g>
-<!-- Node116 -->
+<!-- Node117 -->
 <g id="node40" class="node">
-<title>Node116</title>
+<title>Node117</title>
 <g id="a_node40"><a xlink:href="memory__pools_8h.html" target="_top" xlink:title="The object definition for relay.build argument type of memory pools. ">
-<polygon fill="#ffffff" stroke="#000000" points="623,-268.5 623,-298.5 751,-298.5 751,-268.5 623,-268.5"/>
-<text text-anchor="start" x="631" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/memory</text>
-<text text-anchor="middle" x="687" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_pools.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="321.5,-201.5 321.5,-231.5 449.5,-231.5 449.5,-201.5 321.5,-201.5"/>
+<text text-anchor="start" x="329.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/memory</text>
+<text text-anchor="middle" x="385.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_pools.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node116 -->
-<g id="edge61" class="edge">
-<title>Node115&#45;&gt;Node116</title>
-<path fill="none" stroke="#191970" d="M1325.1292,-347.6323C1211.8629,-342.0121 968.5615,-327.5314 765,-299 760.488,-298.3676 755.8431,-297.6407 751.1746,-296.8541"/>
-<polygon fill="#191970" stroke="#191970" points="1325.1926,-351.1395 1335.3519,-348.1329 1325.5351,-344.1479 1325.1926,-351.1395"/>
+<!-- Node116&#45;&gt;Node117 -->
+<g id="edge60" class="edge">
+<title>Node116&#45;&gt;Node117</title>
+<path fill="none" stroke="#191970" d="M1693.7211,-282.162C1512.4306,-278.0673 978.6556,-263.9891 536.5,-232 507.9815,-229.9367 476.5127,-226.8382 449.7173,-223.9525"/>
+<polygon fill="#191970" stroke="#191970" points="1693.8015,-285.6646 1703.8773,-282.3893 1693.9582,-278.6664 1693.8015,-285.6646"/>
 </g>
-<!-- Node117 -->
+<!-- Node118 -->
 <g id="node41" class="node">
-<title>Node117</title>
+<title>Node118</title>
 <g id="a_node41"><a xlink:href="tir_2usmp_2utils_8h.html" target="_top" xlink:title="Utilities for Unified Static Memory Planner. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="687,-201.5 687,-231.5 805,-231.5 805,-201.5 687,-201.5"/>
-<text text-anchor="start" x="695" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/usmp</text>
-<text text-anchor="middle" x="746" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/utils.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="211.5,-134.5 211.5,-164.5 329.5,-164.5 329.5,-134.5 211.5,-134.5"/>
+<text text-anchor="start" x="219.5" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/usmp</text>
+<text text-anchor="middle" x="270.5" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/utils.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node117 -->
-<g id="edge83" class="edge">
-<title>Node115&#45;&gt;Node117</title>
-<path fill="none" stroke="#191970" d="M1325.038,-347.9387C1182.3991,-341.7911 847.298,-324.7725 803,-299 776.8172,-283.7669 759.8941,-251.1284 751.739,-231.7822"/>
-<polygon fill="#191970" stroke="#191970" points="1325.2374,-351.4503 1335.3775,-348.3797 1325.5358,-344.4566 1325.2374,-351.4503"/>
+<!-- Node116&#45;&gt;Node118 -->
+<g id="edge82" class="edge">
+<title>Node116&#45;&gt;Node118</title>
+<path fill="none" stroke="#191970" d="M1693.6764,-282.2921C1484.2038,-278.0692 818.9718,-262.5366 606.5,-232 552.6188,-224.2562 406.6058,-186.1225 325.9304,-164.5133"/>
+<polygon fill="#191970" stroke="#191970" points="1693.8232,-285.7957 1703.8912,-282.4964 1693.9632,-278.7971 1693.8232,-285.7957"/>
 </g>
-<!-- Node121 -->
+<!-- Node122 -->
 <g id="node42" class="node">
-<title>Node121</title>
+<title>Node122</title>
 <g id="a_node42"><a xlink:href="builder_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/builder.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1365,-268.5 1365,-298.5 1517,-298.5 1517,-268.5 1365,-268.5"/>
-<text text-anchor="start" x="1373" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1441" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builder.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1155.5,-201.5 1155.5,-231.5 1307.5,-231.5 1307.5,-201.5 1155.5,-201.5"/>
+<text text-anchor="start" x="1163.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1231.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builder.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node121 -->
-<g id="edge64" class="edge">
-<title>Node115&#45;&gt;Node121</title>
-<path fill="none" stroke="#191970" d="M1406.9665,-327.3509C1414.3417,-317.8482 1422.6734,-307.1132 1429.2471,-298.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1404.1659,-325.2508 1400.7996,-335.2967 1409.6958,-329.5427 1404.1659,-325.2508"/>
+<!-- Node116&#45;&gt;Node122 -->
+<g id="edge63" class="edge">
+<title>Node116&#45;&gt;Node122</title>
+<path fill="none" stroke="#191970" d="M1693.6606,-277.0299C1608.5829,-268.1627 1453.4219,-251.1375 1321.5,-232 1316.9497,-231.3399 1312.2753,-230.6304 1307.5635,-229.8913"/>
+<polygon fill="#191970" stroke="#191970" points="1693.3742,-280.5189 1703.6822,-278.0704 1694.0972,-273.5563 1693.3742,-280.5189"/>
 </g>
-<!-- Node124 -->
+<!-- Node125 -->
 <g id="node43" class="node">
-<title>Node124</title>
+<title>Node125</title>
 <g id="a_node43"><a xlink:href="tune__context_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/tune_context.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1294,-201.5 1294,-231.5 1446,-231.5 1446,-201.5 1294,-201.5"/>
-<text text-anchor="start" x="1302" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1370" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1207.5,-134.5 1207.5,-164.5 1359.5,-164.5 1359.5,-134.5 1207.5,-134.5"/>
+<text text-anchor="start" x="1215.5" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1283.5" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node124 -->
-<g id="edge69" class="edge">
-<title>Node115&#45;&gt;Node124</title>
-<path fill="none" stroke="#191970" d="M1370.1428,-327.2678C1364.4433,-318.8686 1358.9243,-308.9839 1356,-299 1349.262,-275.9958 1357.0256,-248.3733 1363.3964,-231.6421"/>
-<polygon fill="#191970" stroke="#191970" points="1367.3481,-329.376 1376.0465,-335.4249 1373.0188,-325.2719 1367.3481,-329.376"/>
+<!-- Node116&#45;&gt;Node125 -->
+<g id="edge68" class="edge">
+<title>Node116&#45;&gt;Node125</title>
+<path fill="none" stroke="#191970" d="M1693.8852,-278.4011C1604.2097,-270.5568 1445.6293,-254.0898 1392.5,-232 1354.3693,-216.1462 1317.7172,-183.7307 1298.1492,-164.5912"/>
+<polygon fill="#191970" stroke="#191970" points="1693.6722,-281.8957 1703.9364,-279.2695 1694.2748,-274.9216 1693.6722,-281.8957"/>
 </g>
-<!-- Node125 -->
+<!-- Node126 -->
 <g id="node44" class="node">
-<title>Node125</title>
+<title>Node126</title>
 <g id="a_node44"><a xlink:href="database_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/database.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1993,-268.5 1993,-298.5 2145,-298.5 2145,-268.5 1993,-268.5"/>
-<text text-anchor="start" x="2001" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2069" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1733.5,-201.5 1733.5,-231.5 1885.5,-231.5 1885.5,-201.5 1733.5,-201.5"/>
+<text text-anchor="start" x="1741.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1809.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node125 -->
-<g id="edge66" class="edge">
-<title>Node115&#45;&gt;Node125</title>
-<path fill="none" stroke="#191970" d="M1452.5797,-335.6882C1454.0651,-335.4454 1455.5401,-335.2154 1457,-335 1687.0605,-301.0566 1748.113,-326.7706 1979,-299 1983.5651,-298.4509 1988.2508,-297.8273 1992.9713,-297.1532"/>
-<polygon fill="#191970" stroke="#191970" points="1451.9058,-332.253 1442.6762,-337.4554 1453.1355,-339.1442 1451.9058,-332.253"/>
+<!-- Node116&#45;&gt;Node126 -->
+<g id="edge65" class="edge">
+<title>Node116&#45;&gt;Node126</title>
+<path fill="none" stroke="#191970" d="M1775.4665,-260.3509C1782.8417,-250.8482 1791.1734,-240.1132 1797.7471,-231.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1772.6659,-258.2508 1769.2996,-268.2967 1778.1958,-262.5427 1772.6659,-258.2508"/>
 </g>
-<!-- Node127 -->
+<!-- Node128 -->
 <g id="node45" class="node">
-<title>Node127</title>
+<title>Node128</title>
 <g id="a_node45"><a xlink:href="extracted__task_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/extracted_task.h">
-<polygon fill="#ffffff" stroke="#000000" points="2163,-268.5 2163,-298.5 2315,-298.5 2315,-268.5 2163,-268.5"/>
-<text text-anchor="start" x="2171" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2239" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extracted_task.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1903.5,-201.5 1903.5,-231.5 2055.5,-231.5 2055.5,-201.5 1903.5,-201.5"/>
+<text text-anchor="start" x="1911.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1979.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extracted_task.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node127 -->
-<g id="edge67" class="edge">
-<title>Node115&#45;&gt;Node127</title>
-<path fill="none" stroke="#191970" d="M1452.5748,-335.6539C1454.0616,-335.4214 1455.5383,-335.2028 1457,-335 1764.2478,-292.373 1845.8935,-334.8977 2154,-299 2156.8309,-298.6702 2159.7098,-298.3039 2162.614,-297.9085"/>
-<polygon fill="#191970" stroke="#191970" points="1451.9207,-332.215 1442.6637,-337.3684 1453.114,-339.1125 1451.9207,-332.215"/>
+<!-- Node116&#45;&gt;Node128 -->
+<g id="edge66" class="edge">
+<title>Node116&#45;&gt;Node128</title>
+<path fill="none" stroke="#191970" d="M1816.9814,-265.5484C1852.1209,-254.9432 1896.1125,-241.6665 1929.4616,-231.6017"/>
+<polygon fill="#191970" stroke="#191970" points="1815.8834,-262.2238 1807.3211,-268.4639 1817.9059,-268.9252 1815.8834,-262.2238"/>
 </g>
-<!-- Node128 -->
+<!-- Node129 -->
 <g id="node46" class="node">
-<title>Node128</title>
+<title>Node129</title>
 <g id="a_node46"><a xlink:href="profiler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/profiler.h">
-<polygon fill="#ffffff" stroke="#000000" points="2333,-268.5 2333,-298.5 2485,-298.5 2485,-268.5 2333,-268.5"/>
-<text text-anchor="start" x="2341" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2409" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiler.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2073.5,-201.5 2073.5,-231.5 2225.5,-231.5 2225.5,-201.5 2073.5,-201.5"/>
+<text text-anchor="start" x="2081.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2149.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiler.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node128 -->
-<g id="edge68" class="edge">
-<title>Node115&#45;&gt;Node128</title>
-<path fill="none" stroke="#191970" d="M1452.572,-335.6338C1454.0597,-335.4073 1455.5373,-335.1954 1457,-335 1839.2693,-283.932 1940.7053,-341.696 2324,-299 2326.8325,-298.6845 2329.7128,-298.3304 2332.6182,-297.9453"/>
-<polygon fill="#191970" stroke="#191970" points="1451.9297,-332.1927 1442.6568,-337.3175 1453.1016,-339.0939 1451.9297,-332.1927"/>
+<!-- Node116&#45;&gt;Node129 -->
+<g id="edge67" class="edge">
+<title>Node116&#45;&gt;Node129</title>
+<path fill="none" stroke="#191970" d="M1821.1066,-268.8478C1822.5839,-268.5572 1824.05,-268.2741 1825.5,-268 1931.0516,-248.0495 1958.5374,-249.6374 2064.5,-232 2067.3114,-231.532 2070.1737,-231.0482 2073.0638,-230.5533"/>
+<polygon fill="#191970" stroke="#191970" points="1820.3427,-265.4315 1811.2443,-270.8601 1821.7421,-272.2902 1820.3427,-265.4315"/>
 </g>
-<!-- Node129 -->
+<!-- Node130 -->
 <g id="node47" class="node">
-<title>Node129</title>
+<title>Node130</title>
 <g id="a_node47"><a xlink:href="codegen_8h.html" target="_top" xlink:title="Translates IRModule to runtime::Module. ">
-<polygon fill="#ffffff" stroke="#000000" points="959.5,-268.5 959.5,-298.5 1066.5,-298.5 1066.5,-268.5 959.5,-268.5"/>
-<text text-anchor="start" x="967.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1013" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/codegen.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2446,-201.5 2446,-231.5 2553,-231.5 2553,-201.5 2446,-201.5"/>
+<text text-anchor="start" x="2454" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="2499.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/codegen.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node129 -->
-<g id="edge74" class="edge">
-<title>Node115&#45;&gt;Node129</title>
-<path fill="none" stroke="#191970" d="M1325.0729,-341.4771C1261.626,-332.1709 1161.7277,-316.5706 1076,-299 1072.9104,-298.3668 1069.743,-297.6866 1066.5517,-296.9774"/>
-<polygon fill="#191970" stroke="#191970" points="1324.921,-344.9919 1335.3215,-342.9721 1325.9315,-338.0652 1324.921,-344.9919"/>
+<!-- Node116&#45;&gt;Node130 -->
+<g id="edge73" class="edge">
+<title>Node116&#45;&gt;Node130</title>
+<path fill="none" stroke="#191970" d="M1821.0769,-268.6693C1822.5632,-268.4322 1824.0391,-268.2084 1825.5,-268 2092.603,-229.8895 2164.397,-270.1105 2431.5,-232 2436.1749,-231.333 2441.0036,-230.5095 2445.8308,-229.5926"/>
+<polygon fill="#191970" stroke="#191970" points="1820.414,-265.232 1811.1692,-270.4074 1821.6236,-272.1267 1820.414,-265.232"/>
 </g>
-<!-- Node130 -->
+<!-- Node131 -->
 <g id="node48" class="node">
-<title>Node130</title>
+<title>Node131</title>
 <g id="a_node48"><a xlink:href="generic__func_8h.html" target="_top" xlink:title="Generic function that can be specialzied on a per target basis. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="861.5,-201.5 861.5,-231.5 968.5,-231.5 968.5,-201.5 861.5,-201.5"/>
-<text text-anchor="start" x="869.5" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="915" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/generic_func.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1425,-134.5 1425,-164.5 1532,-164.5 1532,-134.5 1425,-134.5"/>
+<text text-anchor="start" x="1433" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="1478.5" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/generic_func.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node130 -->
-<g id="edge75" class="edge">
-<title>Node115&#45;&gt;Node130</title>
-<path fill="none" stroke="#191970" d="M1325.4211,-346.5612C1210.9832,-338.9607 979.4307,-321.0579 950,-299 928.4617,-282.8573 920.0499,-250.8563 916.8595,-231.8026"/>
-<polygon fill="#191970" stroke="#191970" points="1325.2457,-350.0571 1335.4539,-347.221 1325.7052,-343.0722 1325.2457,-350.0571"/>
+<!-- Node116&#45;&gt;Node131 -->
+<g id="edge74" class="edge">
+<title>Node116&#45;&gt;Node131</title>
+<path fill="none" stroke="#191970" d="M1742.9237,-259.5916C1730.2789,-240.8729 1710.2913,-215.634 1686.5,-201 1660.9127,-185.2613 1584.8288,-168.8843 1532.1441,-158.938"/>
+<polygon fill="#191970" stroke="#191970" points="1740.1316,-261.7176 1748.5257,-268.182 1745.995,-257.8938 1740.1316,-261.7176"/>
 </g>
-<!-- Node131 -->
+<!-- Node132 -->
 <g id="node49" class="node">
-<title>Node131</title>
+<title>Node132</title>
 <g id="a_node49"><a xlink:href="virtual__device_8h.html" target="_top" xlink:title="A compile time representation for where data is to be stored at runtime, and how to compile code to c...">
-<polygon fill="#ffffff" stroke="#ff0000" points="1535.5,-268.5 1535.5,-298.5 1642.5,-298.5 1642.5,-268.5 1535.5,-268.5"/>
-<text text-anchor="start" x="1543.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1589" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/virtual_device.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2244,-201.5 2244,-231.5 2351,-231.5 2351,-201.5 2244,-201.5"/>
+<text text-anchor="start" x="2252" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="2297.5" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/virtual_device.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node131 -->
-<g id="edge78" class="edge">
-<title>Node115&#45;&gt;Node131</title>
-<path fill="none" stroke="#191970" d="M1443.4003,-332.2759C1474.9208,-321.7165 1514.1329,-308.5805 1543.9203,-298.6017"/>
-<polygon fill="#191970" stroke="#191970" points="1442.2542,-328.9686 1433.8839,-335.4639 1444.4778,-335.6061 1442.2542,-328.9686"/>
+<!-- Node116&#45;&gt;Node132 -->
+<g id="edge77" class="edge">
+<title>Node116&#45;&gt;Node132</title>
+<path fill="none" stroke="#191970" d="M1821.0854,-268.7258C1822.5691,-268.4717 1824.0422,-268.2292 1825.5,-268 2005.766,-239.6569 2054.4833,-261.8859 2234.5,-232 2237.6112,-231.4835 2240.7956,-230.8945 2243.9998,-230.2552"/>
+<polygon fill="#191970" stroke="#191970" points="1820.39,-265.2949 1811.1907,-270.5506 1821.6597,-272.1788 1820.39,-265.2949"/>
 </g>
-<!-- Node135 -->
+<!-- Node136 -->
 <g id="node50" class="node">
-<title>Node135</title>
+<title>Node136</title>
 <g id="a_node50"><a xlink:href="tir_2transform_8h.html" target="_top" xlink:title="TIR specific transformation passes. ">
-<polygon fill="#ffffff" stroke="#000000" points="1085,-274 1085,-293 1233,-293 1233,-274 1085,-274"/>
-<text text-anchor="middle" x="1159" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/transform.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2571.5,-207 2571.5,-226 2719.5,-226 2719.5,-207 2571.5,-207"/>
+<text text-anchor="middle" x="2645.5" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/transform.h</text>
 </a>
 </g>
 </g>
-<!-- Node115&#45;&gt;Node135 -->
-<g id="edge82" class="edge">
-<title>Node115&#45;&gt;Node135</title>
-<path fill="none" stroke="#191970" d="M1327.709,-332.6457C1283.9925,-319.9109 1226.9217,-303.2859 1191.6301,-293.0053"/>
-<polygon fill="#191970" stroke="#191970" points="1326.8037,-336.0274 1337.3835,-335.4639 1328.7615,-329.3067 1326.8037,-336.0274"/>
-</g>
-<!-- Node116&#45;&gt;Node117 -->
-<g id="edge62" class="edge">
-<title>Node116&#45;&gt;Node117</title>
-<path fill="none" stroke="#191970" d="M707.1318,-260.6385C715.5619,-251.0653 725.131,-240.1987 732.665,-231.6432"/>
-<polygon fill="#191970" stroke="#191970" points="704.3701,-258.4786 700.388,-268.2967 709.6236,-263.1048 704.3701,-258.4786"/>
+<!-- Node116&#45;&gt;Node136 -->
+<g id="edge81" class="edge">
+<title>Node116&#45;&gt;Node136</title>
+<path fill="none" stroke="#191970" d="M1821.074,-268.6483C1822.5611,-268.4175 1824.038,-268.2008 1825.5,-268 2150.3974,-223.3872 2236.785,-270.1891 2562.5,-232 2574.8752,-230.549 2588.1829,-228.3528 2600.4394,-226.0662"/>
+<polygon fill="#191970" stroke="#191970" points="1820.4232,-265.2088 1811.1618,-270.3543 1821.6106,-272.1074 1820.4232,-265.2088"/>
 </g>
-<!-- Node117&#45;&gt;Node47 -->
-<g id="edge63" class="edge">
-<title>Node117&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M736.4546,-191.9021C732.9106,-182.7696 728.9973,-172.6854 725.8765,-164.6432"/>
-<polygon fill="#191970" stroke="#191970" points="733.2195,-193.2402 740.1002,-201.2967 739.7453,-190.7078 733.2195,-193.2402"/>
+<!-- Node117&#45;&gt;Node118 -->
+<g id="edge61" class="edge">
+<title>Node117&#45;&gt;Node118</title>
+<path fill="none" stroke="#191970" d="M350.9247,-196.3561C333.4917,-186.1995 312.6969,-174.0843 296.62,-164.7177"/>
+<polygon fill="#191970" stroke="#191970" points="349.2893,-199.454 359.6918,-201.4639 352.8132,-193.4056 349.2893,-199.454"/>
 </g>
-<!-- Node121&#45;&gt;Node124 -->
-<g id="edge65" class="edge">
-<title>Node121&#45;&gt;Node124</title>
-<path fill="none" stroke="#191970" d="M1417.3806,-261.2113C1407.0905,-251.5009 1395.2949,-240.3698 1386.0472,-231.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1415.2139,-263.979 1424.889,-268.2967 1420.0181,-258.8879 1415.2139,-263.979"/>
+<!-- Node118&#45;&gt;Node47 -->
+<g id="edge62" class="edge">
+<title>Node118&#45;&gt;Node47</title>
+<path fill="none" stroke="#191970" d="M270.5,-124.0249C270.5,-115.128 270.5,-105.4287 270.5,-97.6432"/>
+<polygon fill="#191970" stroke="#191970" points="267.0001,-124.2966 270.5,-134.2967 274.0001,-124.2967 267.0001,-124.2966"/>
 </g>
-<!-- Node130&#45;&gt;Node44 -->
-<g id="edge76" class="edge">
-<title>Node130&#45;&gt;Node44</title>
-<path fill="none" stroke="#191970" d="M916.8793,-191.3179C917.5491,-182.3414 918.2825,-172.5143 918.8699,-164.6432"/>
-<polygon fill="#191970" stroke="#191970" points="913.3886,-191.0639 916.1346,-201.2967 920.3692,-191.5849 913.3886,-191.0639"/>
+<!-- Node122&#45;&gt;Node125 -->
+<g id="edge64" class="edge">
+<title>Node122&#45;&gt;Node125</title>
+<path fill="none" stroke="#191970" d="M1249.4665,-193.3509C1256.8417,-183.8482 1265.1734,-173.1132 1271.7471,-164.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1246.6659,-191.2508 1243.2996,-201.2967 1252.1958,-195.5427 1246.6659,-191.2508"/>
 </g>
-<!-- Node131&#45;&gt;Node26 -->
-<g id="edge80" class="edge">
-<title>Node131&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1624.4497,-263.5409C1646.4253,-251.1681 1673.6637,-235.8322 1690.9423,-226.1039"/>
-<polygon fill="#191970" stroke="#191970" points="1622.7026,-260.5079 1615.7059,-268.4639 1626.1369,-266.6076 1622.7026,-260.5079"/>
+<!-- Node131&#45;&gt;Node44 -->
+<g id="edge75" class="edge">
+<title>Node131&#45;&gt;Node44</title>
+<path fill="none" stroke="#191970" d="M1414.5862,-139.9456C1399.4925,-137.8414 1383.4435,-135.7276 1368.5,-134 1192.0641,-113.6026 1146.1242,-124.4917 970.5,-98 965.888,-97.3043 961.1295,-96.4978 956.3569,-95.6253"/>
+<polygon fill="#191970" stroke="#191970" points="1414.4666,-143.4634 1424.8578,-141.396 1415.4454,-136.5321 1414.4666,-143.4634"/>
 </g>
-<!-- Node131&#45;&gt;Node33 -->
+<!-- Node132&#45;&gt;Node26 -->
 <g id="edge79" class="edge">
-<title>Node131&#45;&gt;Node33</title>
-<path fill="none" stroke="#191970" d="M1652.4018,-267.9153C1786.1225,-236.143 1819.5993,-221.5152 1951,-201 2128.2346,-173.3289 2174.8644,-186.1068 2353,-165 2366.2666,-163.4281 2380.4403,-161.5033 2393.9433,-159.5517"/>
-<polygon fill="#191970" stroke="#191970" points="1651.5246,-264.5261 1642.598,-270.2328 1653.135,-271.3383 1651.5246,-264.5261"/>
+<title>Node132&#45;&gt;Node26</title>
+<path fill="none" stroke="#191970" d="M2233.7721,-201.0494C2038.1577,-169.3283 1817.7498,-156.6839 1709.0664,-151.9675"/>
+<polygon fill="#191970" stroke="#191970" points="2233.2223,-204.506 2243.6572,-202.6723 2234.3564,-197.5984 2233.2223,-204.506"/>
 </g>
-<!-- Node131&#45;&gt;Node35 -->
-<g id="edge81" class="edge">
-<title>Node131&#45;&gt;Node35</title>
-<path fill="none" stroke="#191970" d="M1561.0616,-262.1433C1534.9914,-243.2584 1494.3134,-216.3864 1455,-201 1377.8092,-170.7892 1324.6538,-229.021 1272,-165 1229.7394,-113.6159 1331.1466,-94.2125 1409.7187,-86.9007"/>
-<polygon fill="#191970" stroke="#191970" points="1559.1893,-265.1108 1569.3197,-268.2134 1563.3352,-259.4706 1559.1893,-265.1108"/>
+<!-- Node132&#45;&gt;Node33 -->
+<g id="edge78" class="edge">
+<title>Node132&#45;&gt;Node33</title>
+<path fill="none" stroke="#191970" d="M2308.0748,-192.0686C2319.9728,-164.58 2338.934,-120.7732 2348.8943,-97.7614"/>
+<polygon fill="#191970" stroke="#191970" points="2304.8009,-190.8214 2304.0406,-201.389 2311.2249,-193.602 2304.8009,-190.8214"/>
+</g>
+<!-- Node132&#45;&gt;Node35 -->
+<g id="edge80" class="edge">
+<title>Node132&#45;&gt;Node35</title>
+<path fill="none" stroke="#191970" d="M2361.1741,-201.6056C2444.4245,-180.6939 2583.1397,-140.4142 2612.5,-98 2620.3418,-86.6716 2621.6844,-77.27 2612.5,-67 2587.7412,-39.3146 2358.8402,-24.7576 2233.5593,-18.8058"/>
+<polygon fill="#191970" stroke="#191970" points="2360.1677,-198.2492 2351.3077,-204.0586 2361.8567,-205.0423 2360.1677,-198.2492"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/auto__scheduler_2feature_8h_source.html b/docs/reference/api/doxygen/auto__scheduler_2feature_8h_source.html
index d80ea0c32..5829dea74 100644
--- a/docs/reference/api/doxygen/auto__scheduler_2feature_8h_source.html
+++ b/docs/reference/api/doxygen/auto__scheduler_2feature_8h_source.html
@@ -66,7 +66,8 @@ $(function() {
 <div class="title">feature.h</div>  </div>
 </div><!--header-->
 <div class="contents">
-<a href="auto__scheduler_2feature_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="commen [...]
+<a href="auto__scheduler_2feature_8h.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno">    1</span>&#160;<span class="comment">/*</span></div><div class="line"><a name="l00002"></a><span class="lineno">    2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one</span></div><div class="line"><a name="l00003"></a><span class="lineno">    3</span>&#160;<span class="commen [...]
+<div class="ttc" id="namespacetvm_1_1script_1_1ir__builder_1_1tir_html_a713fe9c5ed9b44e16b7e5f9a23622543"><div class="ttname"><a href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html#a713fe9c5ed9b44e16b7e5f9a23622543">tvm::script::ir_builder::tir::PrimFunc</a></div><div class="ttdeci">PrimFuncFrame PrimFunc()</div><div class="ttdoc">The primitive function statement. </div></div>
 <div class="ttc" id="tir_2function_8h_html"><div class="ttname"><a href="tir_2function_8h.html">function.h</a></div><div class="ttdoc">TIR Function. </div></div>
 <div class="ttc" id="measure_8h_html"><div class="ttname"><a href="measure_8h.html">measure.h</a></div><div class="ttdoc">Distributed measurement infrastructure to measure the runtime costs of tensor programs. These functions are responsible for building the tvm module, uploading it to remote devices, recording the running time costs, and checking the correctness of the output. </div></div>
 <div class="ttc" id="namespacetvm_1_1auto__scheduler_html_ab57220e3e6abc9f0bac9c60a634e74df"><div class="ttname"><a href="namespacetvm_1_1auto__scheduler.html#ab57220e3e6abc9f0bac9c60a634e74df">tvm::auto_scheduler::GetPerStoreFeaturesFromMeasurePairs</a></div><div class="ttdeci">void GetPerStoreFeaturesFromMeasurePairs(const Array&lt; MeasureInput &gt; &amp;inputs, const Array&lt; MeasureResult &gt; &amp;results, int skip_first_n_feature_extraction, int max_n_bufs, std::vector&lt; std::v [...]
diff --git a/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg b/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg
index 6514b40f8..fc64aa387 100644
--- a/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/c__runtime__api_8h__dep__incl.svg
@@ -47,48 +47,48 @@
 <path fill="none" stroke="#191970" d="M1489.7942,-818.327C1256.9775,-814.0592 505.6994,-798.0116 464,-768 427.4253,-741.6768 412.4032,-707.0867 438,-670 491.1883,-592.9365 776.0801,-519.6405 884.8007,-494.0629"/>
 <polygon fill="#191970" stroke="#191970" points="1489.7677,-821.827 1499.8297,-818.5095 1489.895,-814.8282 1489.7677,-821.827"/>
 </g>
-<!-- Node174 -->
+<!-- Node176 -->
 <g id="node20" class="node">
-<title>Node174</title>
+<title>Node176</title>
 <g id="a_node20"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="157.5,-140 157.5,-159 278.5,-159 278.5,-140 157.5,-140"/>
 <text text-anchor="middle" x="218" y="-147" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node174 -->
+<!-- Node4&#45;&gt;Node176 -->
 <g id="edge121" class="edge">
-<title>Node4&#45;&gt;Node174</title>
+<title>Node4&#45;&gt;Node176</title>
 <path fill="none" stroke="#191970" d="M1489.7755,-818.3853C1231.2208,-813.961 319.8704,-796.4816 193,-768 101.9794,-747.5665 0,-778.786 0,-685.5 0,-685.5 0,-685.5 0,-278 0,-204.632 92.7704,-172.3028 157.2748,-158.6916"/>
 <polygon fill="#191970" stroke="#191970" points="1489.727,-821.8849 1499.7852,-818.5556 1489.8462,-814.8859 1489.727,-821.8849"/>
 </g>
-<!-- Node188 -->
+<!-- Node190 -->
 <g id="node29" class="node">
-<title>Node188</title>
+<title>Node190</title>
 <g id="a_node29"><a xlink:href="reflection_8h.html" target="_top" xlink:title="Reflection and serialization of compiler IR/AST nodes. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="861.5,-542 861.5,-561 1020.5,-561 1020.5,-542 861.5,-542"/>
 <text text-anchor="middle" x="941" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/reflection.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node188 -->
+<!-- Node4&#45;&gt;Node190 -->
 <g id="edge57" class="edge">
-<title>Node4&#45;&gt;Node188</title>
+<title>Node4&#45;&gt;Node190</title>
 <path fill="none" stroke="#191970" d="M1489.6438,-818.9697C1271.756,-816.8874 602.6937,-807.3259 514,-768 475.5024,-750.9305 463.1156,-739.9065 447,-701 441.7275,-688.271 438.265,-680.6549 447,-670 498.7235,-606.9074 547.5117,-652.374 627,-634 729.693,-610.2622 849.9035,-577.1831 907.2441,-561.0759"/>
 <polygon fill="#191970" stroke="#191970" points="1489.642,-822.4698 1499.6742,-819.0633 1489.7073,-815.4701 1489.642,-822.4698"/>
 </g>
-<!-- Node194 -->
+<!-- Node196 -->
 <g id="node31" class="node">
-<title>Node194</title>
+<title>Node196</title>
 <g id="a_node31"><a xlink:href="serialization_8h.html" target="_top" xlink:title="include/tvm/node/serialization.h">
 <polygon fill="#ffffff" stroke="#000000" points="488.5,-676 488.5,-695 661.5,-695 661.5,-676 488.5,-676"/>
 <text text-anchor="middle" x="575" y="-683" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/serialization.h</text>
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node194 -->
+<!-- Node4&#45;&gt;Node196 -->
 <g id="edge61" class="edge">
-<title>Node4&#45;&gt;Node194</title>
+<title>Node4&#45;&gt;Node196</title>
 <path fill="none" stroke="#191970" d="M1489.5909,-816.4183C1336.914,-809.2314 972.2842,-790.2689 851,-768 756.3107,-750.6141 647.9656,-712.7267 600.0422,-695.0007"/>
 <polygon fill="#191970" stroke="#191970" points="1489.6212,-819.9234 1499.7739,-816.8949 1489.9485,-812.9311 1489.6212,-819.9234"/>
 </g>
@@ -108,9 +108,9 @@
 <path fill="none" stroke="#191970" d="M1626.1779,-805.5368C1676.0203,-795.2814 1745.2416,-780.9403 1806,-768 1809.085,-767.343 1812.2445,-766.6664 1815.4347,-765.9805"/>
 <polygon fill="#191970" stroke="#191970" points="1625.2516,-802.154 1616.1615,-807.5964 1626.6615,-809.0105 1625.2516,-802.154"/>
 </g>
-<!-- Node195 -->
+<!-- Node197 -->
 <g id="node33" class="node">
-<title>Node195</title>
+<title>Node197</title>
 <g id="a_node33"><a xlink:href="builtin__fp16_8h.html" target="_top" xlink:title="Functions for conversion between fp32 and fp16. ">
 <polygon fill="#ffffff" stroke="#000000" points="1957,-737.5 1957,-767.5 2073,-767.5 2073,-737.5 1957,-737.5"/>
 <text text-anchor="start" x="1965" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -118,15 +118,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node195 -->
+<!-- Node4&#45;&gt;Node197 -->
 <g id="edge63" class="edge">
-<title>Node4&#45;&gt;Node195</title>
+<title>Node4&#45;&gt;Node197</title>
 <path fill="none" stroke="#191970" d="M1626.4254,-812.3798C1704.3544,-803.8218 1835.3473,-788.0716 1947,-768 1950.2177,-767.4216 1953.513,-766.7927 1956.8339,-766.1305"/>
 <polygon fill="#191970" stroke="#191970" points="1625.8179,-808.9252 1616.2563,-813.4887 1626.5769,-815.8839 1625.8179,-808.9252"/>
 </g>
-<!-- Node196 -->
+<!-- Node198 -->
 <g id="node34" class="node">
-<title>Node196</title>
+<title>Node198</title>
 <g id="a_node34"><a xlink:href="c__backend__api_8h.html" target="_top" xlink:title="TVM runtime backend API. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="2091,-737.5 2091,-767.5 2207,-767.5 2207,-737.5 2091,-737.5"/>
 <text text-anchor="start" x="2099" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -134,15 +134,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node196 -->
+<!-- Node4&#45;&gt;Node198 -->
 <g id="edge64" class="edge">
-<title>Node4&#45;&gt;Node196</title>
+<title>Node4&#45;&gt;Node198</title>
 <path fill="none" stroke="#191970" d="M1626.0861,-815.6303C1726.3954,-809.2922 1919.5734,-794.6551 2082,-768 2084.9454,-767.5166 2087.9545,-766.9799 2090.9867,-766.4047"/>
 <polygon fill="#191970" stroke="#191970" points="1625.8476,-812.1383 1616.0849,-816.2541 1626.2834,-819.1247 1625.8476,-812.1383"/>
 </g>
-<!-- Node200 -->
+<!-- Node202 -->
 <g id="node35" class="node">
-<title>Node200</title>
+<title>Node202</title>
 <g id="a_node35"><a xlink:href="graph__executor_8h.html" target="_top" xlink:title="Tiny AoT executor. ">
 <polygon fill="#ffffff" stroke="#000000" points="2304.5,-603.5 2304.5,-633.5 2423.5,-633.5 2423.5,-603.5 2304.5,-603.5"/>
 <text text-anchor="start" x="2312.5" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -150,15 +150,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node200 -->
+<!-- Node4&#45;&gt;Node202 -->
 <g id="edge65" class="edge">
-<title>Node4&#45;&gt;Node200</title>
+<title>Node4&#45;&gt;Node202</title>
 <path fill="none" stroke="#191970" d="M1626.2715,-817.7835C1820.1927,-812.5781 2362.0458,-795.6145 2388,-768 2423.106,-730.6482 2390.805,-663.4573 2373.4184,-633.5574"/>
 <polygon fill="#191970" stroke="#191970" points="1626.1046,-814.2866 1616.2014,-818.0516 1626.291,-821.2841 1626.1046,-814.2866"/>
 </g>
-<!-- Node199 -->
+<!-- Node201 -->
 <g id="node36" class="node">
-<title>Node199</title>
+<title>Node201</title>
 <g id="a_node36"><a xlink:href="crt_2packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
 <polygon fill="#ffffff" stroke="#000000" points="2263,-670.5 2263,-700.5 2379,-700.5 2379,-670.5 2263,-670.5"/>
 <text text-anchor="start" x="2271" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -166,15 +166,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node199 -->
+<!-- Node4&#45;&gt;Node201 -->
 <g id="edge66" class="edge">
-<title>Node4&#45;&gt;Node199</title>
+<title>Node4&#45;&gt;Node201</title>
 <path fill="none" stroke="#191970" d="M1626.163,-816.4728C1783.4186,-809.1594 2162.2712,-789.5349 2216,-768 2236.2314,-759.8911 2236.9134,-750.5318 2254,-737 2269.6417,-724.6125 2287.5733,-710.852 2300.9472,-700.6735"/>
 <polygon fill="#191970" stroke="#191970" points="1625.9588,-812.9785 1616.1311,-816.9367 1626.2821,-819.971 1625.9588,-812.9785"/>
 </g>
-<!-- Node201 -->
+<!-- Node203 -->
 <g id="node37" class="node">
-<title>Node201</title>
+<title>Node203</title>
 <g id="a_node37"><a xlink:href="page__allocator_8h.html" target="_top" xlink:title="An implementation of a dynamic memory allocator for microcontrollers. ">
 <polygon fill="#ffffff" stroke="#000000" points="2435,-737.5 2435,-767.5 2551,-767.5 2551,-737.5 2435,-737.5"/>
 <text text-anchor="start" x="2443" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -182,15 +182,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node201 -->
+<!-- Node4&#45;&gt;Node203 -->
 <g id="edge68" class="edge">
-<title>Node4&#45;&gt;Node201</title>
+<title>Node4&#45;&gt;Node203</title>
 <path fill="none" stroke="#191970" d="M1626.3199,-816.7715C1802.2609,-809.5286 2267.8125,-788.9353 2421,-768 2425.5607,-767.3767 2430.2625,-766.6222 2434.9756,-765.7857"/>
 <polygon fill="#191970" stroke="#191970" points="1626.0032,-813.2815 1616.155,-817.1884 1626.29,-820.2756 1626.0032,-813.2815"/>
 </g>
-<!-- Node202 -->
+<!-- Node204 -->
 <g id="node38" class="node">
-<title>Node202</title>
+<title>Node204</title>
 <g id="a_node38"><a xlink:href="platform_8h.html" target="_top" xlink:title="The virtual memory manager for micro&#45;controllers. ">
 <polygon fill="#ffffff" stroke="#000000" points="2263,-737.5 2263,-767.5 2379,-767.5 2379,-737.5 2263,-737.5"/>
 <text text-anchor="start" x="2271" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -198,15 +198,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node202 -->
+<!-- Node4&#45;&gt;Node204 -->
 <g id="edge69" class="edge">
-<title>Node4&#45;&gt;Node202</title>
+<title>Node4&#45;&gt;Node204</title>
 <path fill="none" stroke="#191970" d="M1626.2139,-817.5278C1750.5859,-813.242 2022.3006,-800.691 2249,-768 2253.556,-767.343 2258.2544,-766.5643 2262.9651,-765.711"/>
 <polygon fill="#191970" stroke="#191970" points="1625.9545,-814.0345 1616.0783,-817.8699 1626.1907,-821.0305 1625.9545,-814.0345"/>
 </g>
-<!-- Node203 -->
+<!-- Node205 -->
 <g id="node39" class="node">
-<title>Node203</title>
+<title>Node205</title>
 <g id="a_node39"><a xlink:href="data__type_8h.html" target="_top" xlink:title="include/tvm/runtime\l/data_type.h">
 <polygon fill="#ffffff" stroke="#ff0000" points="202,-737.5 202,-767.5 318,-767.5 318,-737.5 202,-737.5"/>
 <text text-anchor="start" x="210" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -214,15 +214,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node203 -->
+<!-- Node4&#45;&gt;Node205 -->
 <g id="edge71" class="edge">
-<title>Node4&#45;&gt;Node203</title>
+<title>Node4&#45;&gt;Node205</title>
 <path fill="none" stroke="#191970" d="M1489.5664,-818.1584C1266.0139,-813.535 558.6086,-796.9377 332,-768 327.434,-767.4169 322.7283,-766.6913 318.0126,-765.8748"/>
 <polygon fill="#191970" stroke="#191970" points="1489.7723,-821.6633 1499.8421,-818.3696 1489.9162,-814.6648 1489.7723,-821.6633"/>
 </g>
-<!-- Node206 -->
+<!-- Node208 -->
 <g id="node40" class="node">
-<title>Node206</title>
+<title>Node208</title>
 <g id="a_node40"><a xlink:href="ndarray_8h.html" target="_top" xlink:title="A device&#45;independent managed NDArray abstraction. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1014,-670.5 1014,-700.5 1130,-700.5 1130,-670.5 1014,-670.5"/>
 <text text-anchor="start" x="1022" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -230,15 +230,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node206 -->
+<!-- Node4&#45;&gt;Node208 -->
 <g id="edge100" class="edge">
-<title>Node4&#45;&gt;Node206</title>
+<title>Node4&#45;&gt;Node208</title>
 <path fill="none" stroke="#191970" d="M1489.6894,-813.4807C1426.2466,-806.6398 1329.6992,-793.0212 1249,-768 1193.1404,-750.6804 1132.1945,-719.243 1098.3796,-700.5776"/>
 <polygon fill="#191970" stroke="#191970" points="1489.6577,-816.9966 1499.9687,-814.561 1490.3893,-810.0349 1489.6577,-816.9966"/>
 </g>
-<!-- Node208 -->
+<!-- Node210 -->
 <g id="node41" class="node">
-<title>Node208</title>
+<title>Node210</title>
 <g id="a_node41"><a xlink:href="device__api_8h.html" target="_top" xlink:title="Abstract device memory management API. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1327,-536.5 1327,-566.5 1443,-566.5 1443,-536.5 1327,-536.5"/>
 <text text-anchor="start" x="1335" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -246,15 +246,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node208 -->
+<!-- Node4&#45;&gt;Node210 -->
 <g id="edge95" class="edge">
-<title>Node4&#45;&gt;Node208</title>
+<title>Node4&#45;&gt;Node210</title>
 <path fill="none" stroke="#191970" d="M1550.5323,-794.9129C1536.7167,-752.3471 1503.9449,-663.6728 1454,-603 1442.0298,-588.4587 1425.1276,-575.7557 1411.0481,-566.604"/>
 <polygon fill="#191970" stroke="#191970" points="1547.2011,-795.9871 1553.5621,-804.46 1553.8732,-793.8697 1547.2011,-795.9871"/>
 </g>
-<!-- Node209 -->
+<!-- Node211 -->
 <g id="node42" class="node">
-<title>Node209</title>
+<title>Node211</title>
 <g id="a_node42"><a xlink:href="profiling_8h.html" target="_top" xlink:title="Runtime profiling including timers. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1318,-469.5 1318,-499.5 1434,-499.5 1434,-469.5 1318,-469.5"/>
 <text text-anchor="start" x="1326" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -262,15 +262,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node209 -->
+<!-- Node4&#45;&gt;Node211 -->
 <g id="edge117" class="edge">
-<title>Node4&#45;&gt;Node209</title>
+<title>Node4&#45;&gt;Node211</title>
 <path fill="none" stroke="#191970" d="M1563.2316,-794.0743C1569.9357,-763.7705 1582.8931,-711.9611 1601,-670 1626.9259,-609.919 1703.9705,-586.2031 1662,-536 1633.5642,-501.9865 1509.3762,-490.4024 1434.3104,-486.4822"/>
 <polygon fill="#191970" stroke="#191970" points="1559.7331,-793.6947 1561.0466,-804.2078 1566.5758,-795.1702 1559.7331,-793.6947"/>
 </g>
-<!-- Node211 -->
+<!-- Node213 -->
 <g id="node43" class="node">
-<title>Node211</title>
+<title>Node213</title>
 <g id="a_node43"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1329,-603.5 1329,-633.5 1445,-633.5 1445,-603.5 1329,-603.5"/>
 <text text-anchor="start" x="1337" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -278,15 +278,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node211 -->
+<!-- Node4&#45;&gt;Node213 -->
 <g id="edge116" class="edge">
-<title>Node4&#45;&gt;Node211</title>
+<title>Node4&#45;&gt;Node213</title>
 <path fill="none" stroke="#191970" d="M1512.6297,-799.8571C1497.169,-791.6181 1480.6202,-780.9167 1468,-768 1428.1844,-727.2492 1402.3649,-662.971 1392.0682,-633.802"/>
 <polygon fill="#191970" stroke="#191970" points="1511.0713,-802.9912 1521.5692,-804.4217 1514.2547,-796.7569 1511.0713,-802.9912"/>
 </g>
-<!-- Node212 -->
+<!-- Node214 -->
 <g id="node44" class="node">
-<title>Node212</title>
+<title>Node214</title>
 <g id="a_node44"><a xlink:href="runtime_2module_8h.html" target="_top" xlink:title="Runtime container of the functions generated by TVM, This is used to support dynamically link...">
 <polygon fill="#ffffff" stroke="#ff0000" points="1537,-536.5 1537,-566.5 1653,-566.5 1653,-536.5 1537,-536.5"/>
 <text text-anchor="start" x="1545" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -294,15 +294,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node212 -->
+<!-- Node4&#45;&gt;Node214 -->
 <g id="edge99" class="edge">
-<title>Node4&#45;&gt;Node212</title>
+<title>Node4&#45;&gt;Node214</title>
 <path fill="none" stroke="#191970" d="M1557.0717,-793.7247C1556.4068,-763.7999 1556.5568,-713.0424 1563,-670 1568.6787,-632.065 1582.192,-588.8915 1589.7151,-566.6006"/>
 <polygon fill="#191970" stroke="#191970" points="1553.5836,-794.2133 1557.355,-804.1141 1560.581,-794.0224 1553.5836,-794.2133"/>
 </g>
-<!-- Node217 -->
+<!-- Node219 -->
 <g id="node45" class="node">
-<title>Node217</title>
+<title>Node219</title>
 <g id="a_node45"><a xlink:href="serializer_8h.html" target="_top" xlink:title="Serializer extension to support TVM data types Include this file to enable serialization of DLDataTyp...">
 <polygon fill="#ffffff" stroke="#000000" points="1119,-603.5 1119,-633.5 1235,-633.5 1235,-603.5 1119,-603.5"/>
 <text text-anchor="start" x="1127" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -310,15 +310,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node217 -->
+<!-- Node4&#45;&gt;Node219 -->
 <g id="edge118" class="edge">
-<title>Node4&#45;&gt;Node217</title>
+<title>Node4&#45;&gt;Node219</title>
 <path fill="none" stroke="#191970" d="M1511.4471,-800.5202C1489.5206,-791.2419 1463.1559,-779.5941 1440,-768 1350.0655,-722.97 1247.5135,-661.6528 1201.5459,-633.6131"/>
 <polygon fill="#191970" stroke="#191970" points="1510.2898,-803.8303 1520.865,-804.4757 1513.0005,-797.3764 1510.2898,-803.8303"/>
 </g>
-<!-- Node218 -->
+<!-- Node220 -->
 <g id="node46" class="node">
-<title>Node218</title>
+<title>Node220</title>
 <g id="a_node46"><a xlink:href="memory__manager_8h.html" target="_top" xlink:title="Abstract device memory management API. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="849.5,-603.5 849.5,-633.5 986.5,-633.5 986.5,-603.5 849.5,-603.5"/>
 <text text-anchor="start" x="857.5" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -326,15 +326,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node218 -->
+<!-- Node4&#45;&gt;Node220 -->
 <g id="edge119" class="edge">
-<title>Node4&#45;&gt;Node218</title>
+<title>Node4&#45;&gt;Node220</title>
 <path fill="none" stroke="#191970" d="M1489.5912,-817.0723C1317.8091,-810.6865 871.5715,-792.2295 725,-768 613.2014,-749.5187 547.7975,-791.0411 479,-701 414.679,-616.8177 351.8214,-687.7094 797,-634 813.926,-631.9579 832.1718,-629.6675 849.2335,-627.4868"/>
 <polygon fill="#191970" stroke="#191970" points="1489.648,-820.5767 1499.7705,-817.4485 1489.9066,-813.5814 1489.648,-820.5767"/>
 </g>
-<!-- Node166 -->
+<!-- Node168 -->
 <g id="node47" class="node">
-<title>Node166</title>
+<title>Node168</title>
 <g id="a_node47"><a xlink:href="metadata_8h.html" target="_top" xlink:title="Defines types which can be used in Metadata. ">
 <polygon fill="#ffffff" stroke="#000000" points="1643,-670.5 1643,-700.5 1759,-700.5 1759,-670.5 1643,-670.5"/>
 <text text-anchor="start" x="1651" y="-688.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -342,15 +342,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node166 -->
+<!-- Node4&#45;&gt;Node168 -->
 <g id="edge96" class="edge">
-<title>Node4&#45;&gt;Node166</title>
+<title>Node4&#45;&gt;Node168</title>
 <path fill="none" stroke="#191970" d="M1590.134,-798.5005C1602.7463,-789.678 1617.0353,-778.9336 1629,-768 1652.436,-746.5838 1675.9351,-718.001 1689.4417,-700.7155"/>
 <polygon fill="#191970" stroke="#191970" points="1587.9808,-795.7333 1581.7195,-804.2801 1591.944,-801.5034 1587.9808,-795.7333"/>
 </g>
-<!-- Node220 -->
+<!-- Node222 -->
 <g id="node48" class="node">
-<title>Node220</title>
+<title>Node222</title>
 <g id="a_node48"><a xlink:href="metadata__types_8h.html" target="_top" xlink:title="Defines types which can be used in metadata here which are also shared between C and C++ code bases...">
 <polygon fill="#ffffff" stroke="#ff0000" points="1681,-737.5 1681,-767.5 1797,-767.5 1797,-737.5 1681,-737.5"/>
 <text text-anchor="start" x="1689" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -358,15 +358,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node220 -->
+<!-- Node4&#45;&gt;Node222 -->
 <g id="edge97" class="edge">
-<title>Node4&#45;&gt;Node220</title>
+<title>Node4&#45;&gt;Node222</title>
 <path fill="none" stroke="#191970" d="M1608.3423,-800.865C1636.6747,-790.3773 1671.5938,-777.4515 1698.2029,-767.6017"/>
 <polygon fill="#191970" stroke="#191970" points="1606.783,-797.71 1598.6199,-804.4639 1609.2131,-804.2747 1606.783,-797.71"/>
 </g>
-<!-- Node222 -->
+<!-- Node224 -->
 <g id="node49" class="node">
-<title>Node222</title>
+<title>Node224</title>
 <g id="a_node49"><a xlink:href="object_8h.html" target="_top" xlink:title="A managed object in the TVM runtime. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="860,-737.5 860,-767.5 976,-767.5 976,-737.5 860,-737.5"/>
 <text text-anchor="start" x="868" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
@@ -374,15 +374,15 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node222 -->
+<!-- Node4&#45;&gt;Node224 -->
 <g id="edge101" class="edge">
-<title>Node4&#45;&gt;Node222</title>
+<title>Node4&#45;&gt;Node224</title>
 <path fill="none" stroke="#191970" d="M1489.8393,-812.3644C1363.4117,-799.129 1094.6799,-770.9962 976.1898,-758.5917"/>
 <polygon fill="#191970" stroke="#191970" points="1489.4818,-815.846 1499.7919,-813.4063 1490.2107,-808.8841 1489.4818,-815.846"/>
 </g>
-<!-- Node236 -->
+<!-- Node238 -->
 <g id="node50" class="node">
-<title>Node236</title>
+<title>Node238</title>
 <g id="a_node50"><a xlink:href="parallel__for_8h.html" target="_top" xlink:title="An implementation to run loop in parallel. ">
 <polygon fill="#ffffff" stroke="#000000" points="2569,-737.5 2569,-767.5 2683,-767.5 2683,-737.5 2569,-737.5"/>
 <text text-anchor="start" x="2577" y="-755.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/support</text>
@@ -390,9 +390,9 @@
 </a>
 </g>
 </g>
-<!-- Node4&#45;&gt;Node236 -->
+<!-- Node4&#45;&gt;Node238 -->
 <g id="edge120" class="edge">
-<title>Node4&#45;&gt;Node236</title>
+<title>Node4&#45;&gt;Node238</title>
 <path fill="none" stroke="#191970" d="M1626.2523,-817.5884C1821.3569,-811.8702 2379.0302,-793.6891 2560,-768 2562.9133,-767.5864 2565.887,-767.1058 2568.8816,-766.5752"/>
 <polygon fill="#191970" stroke="#191970" points="1626.0258,-814.0934 1616.132,-817.8833 1626.2297,-821.0904 1626.0258,-814.0934"/>
 </g>
@@ -503,72 +503,72 @@
 <path fill="none" stroke="#191970" d="M848.4287,-478.4497C742.6931,-469.6356 565.1108,-452.2082 542,-433 525.1164,-418.9674 484.1223,-266.4615 473.531,-226.1997"/>
 <polygon fill="#191970" stroke="#191970" points="848.1752,-481.9405 858.4293,-479.2755 848.7513,-474.9643 848.1752,-481.9405"/>
 </g>
-<!-- Node139 -->
+<!-- Node140 -->
 <g id="node10" class="node">
-<title>Node139</title>
+<title>Node140</title>
 <g id="a_node10"><a xlink:href="ir_2expr_8h.html" target="_top" xlink:title="Base expr nodes in TVM. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="592.5,-268.5 592.5,-287.5 709.5,-287.5 709.5,-268.5 592.5,-268.5"/>
 <text text-anchor="middle" x="651" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/expr.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node139 -->
+<!-- Node13&#45;&gt;Node140 -->
 <g id="edge11" class="edge">
-<title>Node13&#45;&gt;Node139</title>
+<title>Node13&#45;&gt;Node140</title>
 <path fill="none" stroke="#191970" d="M933.7839,-465.0452C939.5487,-444.7585 944.7714,-412.9196 930,-391 880.8631,-318.0845 775.8012,-292.203 709.5496,-283.0272"/>
 <polygon fill="#191970" stroke="#191970" points="930.3966,-464.1498 930.7361,-474.7392 937.0743,-466.2493 930.3966,-464.1498"/>
 </g>
-<!-- Node147 -->
+<!-- Node148 -->
 <g id="node11" class="node">
-<title>Node147</title>
+<title>Node148</title>
 <g id="a_node11"><a xlink:href="script_2ir__builder_2base_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/base.h">
-<polygon fill="#ffffff" stroke="#000000" points="1020,-201.5 1020,-231.5 1124,-231.5 1124,-201.5 1020,-201.5"/>
+<polygon fill="#ffffff" stroke="#ff0000" points="1020,-201.5 1020,-231.5 1124,-231.5 1124,-201.5 1020,-201.5"/>
 <text text-anchor="start" x="1028" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
 <text text-anchor="middle" x="1072" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/base.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node147 -->
+<!-- Node13&#45;&gt;Node148 -->
 <g id="edge41" class="edge">
-<title>Node13&#45;&gt;Node147</title>
+<title>Node13&#45;&gt;Node148</title>
 <path fill="none" stroke="#191970" d="M975.0117,-472.4029C1020.5166,-460.4557 1082.9524,-442.6177 1090,-433 1135.5894,-370.7852 1096.8563,-269.8139 1079.4273,-231.7066"/>
 <polygon fill="#191970" stroke="#191970" points="974.0925,-469.0255 965.2969,-474.9321 975.8562,-475.7997 974.0925,-469.0255"/>
 </g>
-<!-- Node148 -->
+<!-- Node149 -->
 <g id="node12" class="node">
-<title>Node148</title>
+<title>Node149</title>
 <g id="a_node12"><a xlink:href="ir__builder_2ir_2frame_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/frame.h">
-<polygon fill="#ffffff" stroke="#000000" points="1014,-134.5 1014,-164.5 1130,-164.5 1130,-134.5 1014,-134.5"/>
+<polygon fill="#ffffff" stroke="#ff0000" points="1014,-134.5 1014,-164.5 1130,-164.5 1130,-134.5 1014,-134.5"/>
 <text text-anchor="start" x="1022" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
 <text text-anchor="middle" x="1072" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/frame.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node148 -->
+<!-- Node13&#45;&gt;Node149 -->
 <g id="edge42" class="edge">
-<title>Node13&#45;&gt;Node148</title>
+<title>Node13&#45;&gt;Node149</title>
 <path fill="none" stroke="#191970" d="M979.0777,-472.5473C1030.4601,-460.3638 1102.7824,-442.0136 1113,-433 1146.7652,-403.2137 1152,-384.5256 1152,-339.5 1152,-339.5 1152,-339.5 1152,-278 1152,-242.7513 1151.5975,-230.9433 1133,-201 1123.8593,-186.2828 1109.1015,-173.6593 1096.3914,-164.5937"/>
 <polygon fill="#191970" stroke="#191970" points="978.0357,-469.1969 969.105,-474.8972 979.6412,-476.0103 978.0357,-469.1969"/>
 </g>
-<!-- Node149 -->
+<!-- Node150 -->
 <g id="node13" class="node">
-<title>Node149</title>
-<g id="a_node13"><a xlink:href="ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/ir.h">
+<title>Node150</title>
+<g id="a_node13"><a xlink:href="ir_2ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/ir.h">
 <polygon fill="#ffffff" stroke="#000000" points="1020,-67.5 1020,-97.5 1124,-97.5 1124,-67.5 1020,-67.5"/>
 <text text-anchor="start" x="1028" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
 <text text-anchor="middle" x="1072" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/ir.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node149 -->
+<!-- Node13&#45;&gt;Node150 -->
 <g id="edge43" class="edge">
-<title>Node13&#45;&gt;Node149</title>
+<title>Node13&#45;&gt;Node150</title>
 <path fill="none" stroke="#191970" d="M981.5395,-472.7708C1043.6872,-459.2247 1137.5574,-438.1389 1144,-433 1180.2055,-404.121 1190,-385.8124 1190,-339.5 1190,-339.5 1190,-339.5 1190,-216.5 1190,-161.9431 1134.2266,-119.066 1099.3682,-97.5956"/>
 <polygon fill="#191970" stroke="#191970" points="980.5037,-469.4141 971.4756,-474.959 981.991,-476.2543 980.5037,-469.4141"/>
 </g>
-<!-- Node167 -->
+<!-- Node169 -->
 <g id="node14" class="node">
-<title>Node167</title>
+<title>Node169</title>
 <g id="a_node14"><a xlink:href="doc_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/doc.h">
 <polygon fill="#ffffff" stroke="#ff0000" points="681,-201.5 681,-231.5 785,-231.5 785,-201.5 681,-201.5"/>
 <text text-anchor="start" x="689" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
@@ -576,15 +576,15 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node167 -->
+<!-- Node13&#45;&gt;Node169 -->
 <g id="edge44" class="edge">
-<title>Node13&#45;&gt;Node167</title>
+<title>Node13&#45;&gt;Node169</title>
 <path fill="none" stroke="#191970" d="M937.7007,-466.0571C947.3794,-446.8669 958.7386,-416.1623 949,-391 917.4182,-309.4003 822.0441,-255.7924 769.208,-231.5017"/>
 <polygon fill="#191970" stroke="#191970" points="934.5706,-464.488 932.9133,-474.9524 940.7345,-467.8055 934.5706,-464.488"/>
 </g>
-<!-- Node169 -->
+<!-- Node171 -->
 <g id="node15" class="node">
-<title>Node169</title>
+<title>Node171</title>
 <g id="a_node15"><a xlink:href="printer_2frame_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/frame.h">
 <polygon fill="#ffffff" stroke="#000000" points="784,-134.5 784,-164.5 888,-164.5 888,-134.5 784,-134.5"/>
 <text text-anchor="start" x="792" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
@@ -592,15 +592,15 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node169 -->
+<!-- Node13&#45;&gt;Node171 -->
 <g id="edge45" class="edge">
-<title>Node13&#45;&gt;Node169</title>
+<title>Node13&#45;&gt;Node171</title>
 <path fill="none" stroke="#191970" d="M943.8124,-467.5718C951.9052,-458.2178 960.7564,-445.8984 965,-433 970.8337,-415.2683 969.1244,-409.2053 965,-391 943.7848,-297.355 875.7319,-201.1309 847.9661,-164.703"/>
 <polygon fill="#191970" stroke="#191970" points="941.2087,-465.2325 937.0305,-474.9687 946.3683,-469.9631 941.2087,-465.2325"/>
 </g>
-<!-- Node170 -->
+<!-- Node172 -->
 <g id="node16" class="node">
-<title>Node170</title>
+<title>Node172</title>
 <g id="a_node16"><a xlink:href="ir__docsifier_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/ir_docsifier.h">
 <polygon fill="#ffffff" stroke="#000000" points="698,-.5 698,-30.5 814,-30.5 814,-.5 698,-.5"/>
 <text text-anchor="start" x="706" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
@@ -608,15 +608,15 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node170 -->
+<!-- Node13&#45;&gt;Node172 -->
 <g id="edge46" class="edge">
-<title>Node13&#45;&gt;Node170</title>
+<title>Node13&#45;&gt;Node172</title>
 <path fill="none" stroke="#191970" d="M848.2999,-481.9234C684.6766,-476.0514 318.8064,-460.0523 269,-433 170.1066,-379.2862 151.1357,-338.58 115,-232 83.0811,-137.8572 171.3487,-97.3779 266,-67 345.0027,-41.6444 585.664,-25.143 697.6005,-18.6373"/>
 <polygon fill="#191970" stroke="#191970" points="848.1886,-485.4216 858.3066,-482.2789 848.4372,-478.426 848.1886,-485.4216"/>
 </g>
-<!-- Node171 -->
+<!-- Node173 -->
 <g id="node17" class="node">
-<title>Node171</title>
+<title>Node173</title>
 <g id="a_node17"><a xlink:href="var__table_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/var_table.h">
 <polygon fill="#ffffff" stroke="#000000" points="784,-67.5 784,-97.5 894,-97.5 894,-67.5 784,-67.5"/>
 <text text-anchor="start" x="792" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
@@ -624,9 +624,9 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node171 -->
+<!-- Node13&#45;&gt;Node173 -->
 <g id="edge49" class="edge">
-<title>Node13&#45;&gt;Node171</title>
+<title>Node13&#45;&gt;Node173</title>
 <path fill="none" stroke="#191970" d="M951.03,-468.6896C962.4032,-459.729 974.8237,-447.4251 981,-433 988.3472,-415.8401 983.4426,-409.5062 981,-391 971.4167,-318.3932 907.9245,-150.3334 897,-134 887.4173,-119.6727 872.9173,-106.7977 860.813,-97.5046"/>
 <polygon fill="#191970" stroke="#191970" points="948.6245,-466.115 942.6578,-474.87 952.7818,-471.7468 948.6245,-466.115"/>
 </g>
@@ -662,75 +662,75 @@
 <path fill="none" stroke="#191970" d="M983.7231,-472.9443C990.5514,-471.5939 997.4302,-470.2524 1004,-469 1092.7198,-452.0875 1137.8428,-494.4855 1204,-433 1235.4261,-403.7931 1228,-382.4027 1228,-339.5 1228,-339.5 1228,-339.5 1228,-278 1228,-242.7513 1230.896,-232.3549 1247,-201 1253.9845,-187.401 1265.2218,-174.333 1274.5916,-164.7635"/>
 <polygon fill="#191970" stroke="#191970" points="982.9214,-469.5351 973.797,-474.9199 984.2879,-476.4004 982.9214,-469.5351"/>
 </g>
-<!-- Node13&#45;&gt;Node174 -->
+<!-- Node13&#45;&gt;Node176 -->
 <g id="edge55" class="edge">
-<title>Node13&#45;&gt;Node174</title>
+<title>Node13&#45;&gt;Node176</title>
 <path fill="none" stroke="#191970" d="M848.1399,-481.5912C701.9631,-475.581 400.1796,-460.0432 361,-433 266.61,-367.8486 302.0133,-302.1768 248,-201 240.1057,-186.2125 230.1891,-169.5172 223.9709,-159.2503"/>
 <polygon fill="#191970" stroke="#191970" points="848.2561,-485.0987 858.39,-482.0075 848.5403,-478.1045 848.2561,-485.0987"/>
 </g>
-<!-- Node175 -->
+<!-- Node177 -->
 <g id="node21" class="node">
-<title>Node175</title>
+<title>Node177</title>
 <g id="a_node21"><a xlink:href="relay_2base_8h.html" target="_top" xlink:title="Base classes for the Relay IR. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="275.5,-73 275.5,-92 412.5,-92 412.5,-73 275.5,-73"/>
 <text text-anchor="middle" x="344" y="-80" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/base.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node175 -->
+<!-- Node13&#45;&gt;Node177 -->
 <g id="edge40" class="edge">
-<title>Node13&#45;&gt;Node175</title>
+<title>Node13&#45;&gt;Node177</title>
 <path fill="none" stroke="#191970" d="M848.3186,-479.0729C768.1556,-472.2451 650.4531,-458.2371 613,-433 572.6392,-405.8036 556,-388.1687 556,-339.5 556,-339.5 556,-339.5 556,-216.5 556,-141.4999 460.0214,-106.7619 397.2229,-92.06"/>
 <polygon fill="#191970" stroke="#191970" points="848.1335,-482.5695 858.3891,-479.9099 848.7134,-475.5935 848.1335,-482.5695"/>
 </g>
-<!-- Node181 -->
+<!-- Node183 -->
 <g id="node22" class="node">
-<title>Node181</title>
+<title>Node183</title>
 <g id="a_node22"><a xlink:href="var_8h.html" target="_top" xlink:title="Variables in the TIR. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="124.5,-207 124.5,-226 239.5,-226 239.5,-207 124.5,-207"/>
 <text text-anchor="middle" x="182" y="-214" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/var.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node181 -->
+<!-- Node13&#45;&gt;Node183 -->
 <g id="edge56" class="edge">
-<title>Node13&#45;&gt;Node181</title>
+<title>Node13&#45;&gt;Node183</title>
 <path fill="none" stroke="#191970" d="M848.1749,-480.8413C696.2451,-473.3358 373.7642,-455.0354 328,-433 238.5461,-389.9281 195.2955,-262.109 184.662,-226.0611"/>
 <polygon fill="#191970" stroke="#191970" points="848.2125,-484.3472 858.372,-481.3415 848.5555,-477.3557 848.2125,-484.3472"/>
 </g>
-<!-- Node183 -->
+<!-- Node185 -->
 <g id="node23" class="node">
-<title>Node183</title>
+<title>Node185</title>
 <g id="a_node23"><a xlink:href="ir_2span_8h.html" target="_top" xlink:title="Span information for debugging purposes. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="370.5,-402.5 370.5,-421.5 489.5,-421.5 489.5,-402.5 370.5,-402.5"/>
 <text text-anchor="middle" x="430" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/span.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node183 -->
+<!-- Node13&#45;&gt;Node185 -->
 <g id="edge32" class="edge">
-<title>Node13&#45;&gt;Node183</title>
+<title>Node13&#45;&gt;Node185</title>
 <path fill="none" stroke="#191970" d="M848.2057,-478.5909C762.8112,-471.3325 622.8203,-456.9418 504,-433 489.2067,-430.0192 473.055,-425.5783 459.6264,-421.547"/>
 <polygon fill="#191970" stroke="#191970" points="848.0864,-482.0931 858.3439,-479.4409 848.6713,-475.1176 848.0864,-482.0931"/>
 </g>
-<!-- Node184 -->
+<!-- Node186 -->
 <g id="node24" class="node">
-<title>Node184</title>
+<title>Node186</title>
 <g id="a_node24"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="372,-330 372,-349 490,-349 490,-330 372,-330"/>
 <text text-anchor="middle" x="431" y="-337" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type.h</text>
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node184 -->
+<!-- Node13&#45;&gt;Node186 -->
 <g id="edge38" class="edge">
-<title>Node13&#45;&gt;Node184</title>
+<title>Node13&#45;&gt;Node186</title>
 <path fill="none" stroke="#191970" d="M928.4439,-464.7766C928.8345,-443.2285 925.7458,-409.3415 905,-391 874.4579,-363.9974 610.2294,-348.0563 490.0589,-342.1427"/>
 <polygon fill="#191970" stroke="#191970" points="924.9472,-464.6263 927.9976,-474.7725 931.9403,-464.9386 924.9472,-464.6263"/>
 </g>
-<!-- Node128 -->
+<!-- Node129 -->
 <g id="node25" class="node">
-<title>Node128</title>
+<title>Node129</title>
 <g id="a_node25"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/arg_info.h">
 <polygon fill="#ffffff" stroke="#ff0000" points="1370,-397 1370,-427 1522,-427 1522,-397 1370,-397"/>
 <text text-anchor="start" x="1378" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
@@ -738,15 +738,15 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node128 -->
+<!-- Node13&#45;&gt;Node129 -->
 <g id="edge39" class="edge">
-<title>Node13&#45;&gt;Node128</title>
+<title>Node13&#45;&gt;Node129</title>
 <path fill="none" stroke="#191970" d="M980.3361,-473.0689C988.2481,-471.5753 996.3213,-470.1627 1004,-469 1159.488,-445.4556 1200.8902,-458.9196 1356,-433 1365.8117,-431.3604 1376.1497,-429.2833 1386.1837,-427.0766"/>
 <polygon fill="#191970" stroke="#191970" points="979.5908,-469.6482 970.4421,-474.9916 980.9262,-476.5196 979.5908,-469.6482"/>
 </g>
-<!-- Node186 -->
+<!-- Node188 -->
 <g id="node26" class="node">
-<title>Node186</title>
+<title>Node188</title>
 <g id="a_node26"><a xlink:href="traced__object__functor_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/traced_object\l_functor.h">
 <polygon fill="#ffffff" stroke="#000000" points="1578.5,-391.5 1578.5,-432.5 1699.5,-432.5 1699.5,-391.5 1578.5,-391.5"/>
 <text text-anchor="start" x="1586.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
@@ -755,15 +755,15 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node186 -->
+<!-- Node13&#45;&gt;Node188 -->
 <g id="edge47" class="edge">
-<title>Node13&#45;&gt;Node186</title>
+<title>Node13&#45;&gt;Node188</title>
 <path fill="none" stroke="#191970" d="M979.1504,-473.0474C987.4334,-471.5047 995.929,-470.0833 1004,-469 1236.6815,-437.769 1298.2168,-463.4634 1531,-433 1546.4358,-430.98 1562.9895,-428.1177 1578.3847,-425.1543"/>
 <polygon fill="#191970" stroke="#191970" points="978.3477,-469.6377 969.1954,-474.9749 979.6784,-476.5101 978.3477,-469.6377"/>
 </g>
-<!-- Node187 -->
+<!-- Node189 -->
 <g id="node27" class="node">
-<title>Node187</title>
+<title>Node189</title>
 <g id="a_node27"><a xlink:href="printer_8h.html" target="_top" xlink:title="include/tvm/script\l/printer.h">
 <polygon fill="#ffffff" stroke="#000000" points="792,-397 792,-427 896,-427 896,-397 792,-397"/>
 <text text-anchor="start" x="800" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
@@ -771,9 +771,9 @@
 </a>
 </g>
 </g>
-<!-- Node13&#45;&gt;Node187 -->
+<!-- Node13&#45;&gt;Node189 -->
 <g id="edge50" class="edge">
-<title>Node13&#45;&gt;Node187</title>
+<title>Node13&#45;&gt;Node189</title>
 <path fill="none" stroke="#191970" d="M908.1877,-468.0676C894.1309,-455.7891 875.0604,-439.1311 861.3648,-427.168"/>
 <polygon fill="#191970" stroke="#191970" points="905.9723,-470.7797 915.8063,-474.7223 910.5774,-465.5077 905.9723,-470.7797"/>
 </g>
@@ -793,99 +793,99 @@
 <path fill="none" stroke="#191970" d="M981.7545,-472.996C989.2155,-471.5697 996.7859,-470.1951 1004,-469 1126.0424,-448.7819 1173.6336,-496.1608 1280,-433 1310.8923,-414.656 1331.8293,-376.2522 1341.6294,-354.8094"/>
 <polygon fill="#191970" stroke="#191970" points="980.8357,-469.6093 971.6921,-474.9612 982.1775,-476.4795 980.8357,-469.6093"/>
 </g>
-<!-- Node139&#45;&gt;Node16 -->
+<!-- Node140&#45;&gt;Node16 -->
 <g id="edge12" class="edge">
-<title>Node139&#45;&gt;Node16</title>
+<title>Node140&#45;&gt;Node16</title>
 <path fill="none" stroke="#191970" d="M613.2464,-265.1009C579.107,-253.4366 529.625,-236.5302 498.9275,-226.0419"/>
 <polygon fill="#191970" stroke="#191970" points="612.2805,-268.4694 622.875,-268.3906 614.5437,-261.8454 612.2805,-268.4694"/>
 </g>
-<!-- Node139&#45;&gt;Node147 -->
+<!-- Node140&#45;&gt;Node148 -->
 <g id="edge13" class="edge">
-<title>Node139&#45;&gt;Node147</title>
+<title>Node140&#45;&gt;Node148</title>
 <path fill="none" stroke="#191970" d="M719.9669,-271.1669C791.7321,-263.5653 907.2162,-249.9516 1006,-232 1010.5164,-231.1793 1015.1884,-230.2489 1019.8639,-229.2613"/>
 <polygon fill="#191970" stroke="#191970" points="719.2828,-267.7194 709.703,-272.2446 720.0138,-274.6811 719.2828,-267.7194"/>
 </g>
-<!-- Node139&#45;&gt;Node148 -->
+<!-- Node140&#45;&gt;Node149 -->
 <g id="edge16" class="edge">
-<title>Node139&#45;&gt;Node148</title>
+<title>Node140&#45;&gt;Node149</title>
 <path fill="none" stroke="#191970" d="M720.1081,-272.2067C799.5899,-264.8068 924.6922,-250.6874 968,-232 977.681,-227.8226 1026.6654,-187.46 1053.8146,-164.7803"/>
 <polygon fill="#191970" stroke="#191970" points="719.3282,-268.7635 709.6898,-273.1626 719.9678,-275.7342 719.3282,-268.7635"/>
 </g>
-<!-- Node139&#45;&gt;Node149 -->
+<!-- Node140&#45;&gt;Node150 -->
 <g id="edge17" class="edge">
-<title>Node139&#45;&gt;Node149</title>
+<title>Node140&#45;&gt;Node150</title>
 <path fill="none" stroke="#191970" d="M696.3136,-265.7222C724.8242,-257.4375 762.1229,-245.5645 794,-232 891.3198,-190.5881 999.9077,-126.6455 1047.4595,-97.6818"/>
 <polygon fill="#191970" stroke="#191970" points="695.268,-262.3809 686.6188,-268.4998 697.196,-269.1101 695.268,-262.3809"/>
 </g>
-<!-- Node139&#45;&gt;Node167 -->
+<!-- Node140&#45;&gt;Node169 -->
 <g id="edge18" class="edge">
-<title>Node139&#45;&gt;Node167</title>
+<title>Node140&#45;&gt;Node169</title>
 <path fill="none" stroke="#191970" d="M671.9175,-262.3119C684.5231,-252.8577 700.4921,-240.8809 712.9405,-231.5446"/>
 <polygon fill="#191970" stroke="#191970" points="669.7125,-259.5906 663.8125,-268.3906 673.9125,-265.1906 669.7125,-259.5906"/>
 </g>
-<!-- Node139&#45;&gt;Node26 -->
+<!-- Node140&#45;&gt;Node26 -->
 <g id="edge25" class="edge">
-<title>Node139&#45;&gt;Node26</title>
+<title>Node140&#45;&gt;Node26</title>
 <path fill="none" stroke="#191970" d="M719.7562,-271.5834C851.5923,-259.2801 1137.3853,-232.609 1256.3496,-221.5068"/>
 <polygon fill="#191970" stroke="#191970" points="719.403,-268.1011 709.7715,-272.5153 720.0535,-275.0708 719.403,-268.1011"/>
 </g>
-<!-- Node139&#45;&gt;Node174 -->
+<!-- Node140&#45;&gt;Node176 -->
 <g id="edge28" class="edge">
-<title>Node139&#45;&gt;Node174</title>
+<title>Node140&#45;&gt;Node176</title>
 <path fill="none" stroke="#191970" d="M632.4453,-261.8363C610.83,-243.9436 573.3,-215.6944 536,-201 490.2739,-182.9862 357.4539,-165.4335 278.6754,-156.1989"/>
 <polygon fill="#191970" stroke="#191970" points="630.2743,-264.584 640.1741,-268.3584 634.7888,-259.2342 630.2743,-264.584"/>
 </g>
-<!-- Node139&#45;&gt;Node181 -->
+<!-- Node140&#45;&gt;Node183 -->
 <g id="edge30" class="edge">
-<title>Node139&#45;&gt;Node181</title>
+<title>Node140&#45;&gt;Node183</title>
 <path fill="none" stroke="#191970" d="M581.9495,-268.9454C489.2003,-256.7832 326.0613,-235.3908 239.582,-224.0507"/>
 <polygon fill="#191970" stroke="#191970" points="581.7055,-272.4433 592.0757,-270.2733 582.6157,-265.5027 581.7055,-272.4433"/>
 </g>
-<!-- Node147&#45;&gt;Node148 -->
+<!-- Node148&#45;&gt;Node149 -->
 <g id="edge14" class="edge">
-<title>Node147&#45;&gt;Node148</title>
+<title>Node148&#45;&gt;Node149</title>
 <path fill="none" stroke="#191970" d="M1072,-191.0249C1072,-182.128 1072,-172.4287 1072,-164.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1068.5001,-191.2966 1072,-201.2967 1075.5001,-191.2967 1068.5001,-191.2966"/>
 </g>
-<!-- Node148&#45;&gt;Node149 -->
+<!-- Node149&#45;&gt;Node150 -->
 <g id="edge15" class="edge">
-<title>Node148&#45;&gt;Node149</title>
+<title>Node149&#45;&gt;Node150</title>
 <path fill="none" stroke="#191970" d="M1072,-124.0249C1072,-115.128 1072,-105.4287 1072,-97.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1068.5001,-124.2966 1072,-134.2967 1075.5001,-124.2967 1068.5001,-124.2966"/>
 </g>
-<!-- Node167&#45;&gt;Node169 -->
+<!-- Node169&#45;&gt;Node171 -->
 <g id="edge19" class="edge">
-<title>Node167&#45;&gt;Node169</title>
+<title>Node169&#45;&gt;Node171</title>
 <path fill="none" stroke="#191970" d="M764.8299,-195.7951C780.2634,-185.7558 798.4705,-173.9124 812.6056,-164.7177"/>
 <polygon fill="#191970" stroke="#191970" points="762.5893,-193.0772 756.1152,-201.4639 766.4063,-198.945 762.5893,-193.0772"/>
 </g>
-<!-- Node167&#45;&gt;Node170 -->
+<!-- Node169&#45;&gt;Node172 -->
 <g id="edge23" class="edge">
-<title>Node167&#45;&gt;Node170</title>
+<title>Node169&#45;&gt;Node172</title>
 <path fill="none" stroke="#191970" d="M735.8781,-191.348C740.7232,-149.0061 750.3629,-64.7637 754.2826,-30.5088"/>
 <polygon fill="#191970" stroke="#191970" points="732.3884,-191.0592 734.7287,-201.3923 739.343,-191.8551 732.3884,-191.0592"/>
 </g>
-<!-- Node167&#45;&gt;Node171 -->
+<!-- Node169&#45;&gt;Node173 -->
 <g id="edge24" class="edge">
-<title>Node167&#45;&gt;Node171</title>
+<title>Node169&#45;&gt;Node173</title>
 <path fill="none" stroke="#191970" d="M742.2505,-191.9549C749.5143,-174.6471 760.7714,-151.5217 775,-134 786.5077,-119.8289 802.5077,-106.9539 815.6308,-97.6217"/>
 <polygon fill="#191970" stroke="#191970" points="738.9408,-190.8037 738.4428,-201.3869 745.4318,-193.4242 738.9408,-190.8037"/>
 </g>
-<!-- Node169&#45;&gt;Node170 -->
+<!-- Node171&#45;&gt;Node172 -->
 <g id="edge20" class="edge">
-<title>Node169&#45;&gt;Node170</title>
+<title>Node171&#45;&gt;Node172</title>
 <path fill="none" stroke="#191970" d="M803.2614,-128.2715C792.8111,-119.9906 782.122,-109.6686 775,-98 762.1357,-76.9234 757.9732,-47.98 756.6316,-30.572"/>
 <polygon fill="#191970" stroke="#191970" points="801.4177,-131.2637 811.5184,-134.4618 805.6167,-125.6629 801.4177,-131.2637"/>
 </g>
-<!-- Node169&#45;&gt;Node171 -->
+<!-- Node171&#45;&gt;Node173 -->
 <g id="edge21" class="edge">
-<title>Node169&#45;&gt;Node171</title>
+<title>Node171&#45;&gt;Node173</title>
 <path fill="none" stroke="#191970" d="M837.1407,-124.0249C837.539,-115.128 837.9733,-105.4287 838.3219,-97.6432"/>
 <polygon fill="#191970" stroke="#191970" points="833.6317,-124.1501 836.6807,-134.2967 840.6246,-124.4633 833.6317,-124.1501"/>
 </g>
-<!-- Node171&#45;&gt;Node170 -->
+<!-- Node173&#45;&gt;Node172 -->
 <g id="edge22" class="edge">
-<title>Node171&#45;&gt;Node170</title>
+<title>Node173&#45;&gt;Node172</title>
 <path fill="none" stroke="#191970" d="M812.0942,-60.7808C799.9012,-50.9383 785.7821,-39.541 774.7595,-30.6432"/>
 <polygon fill="#191970" stroke="#191970" points="810.1864,-63.7389 820.166,-67.2967 814.5832,-58.292 810.1864,-63.7389"/>
 </g>
@@ -901,51 +901,51 @@
 <path fill="none" stroke="#191970" d="M1302.9417,-191.6103C1300.3739,-182.5553 1297.5507,-172.5998 1295.2943,-164.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1299.5931,-192.6309 1305.6886,-201.2967 1306.3275,-190.7211 1299.5931,-192.6309"/>
 </g>
-<!-- Node174&#45;&gt;Node175 -->
+<!-- Node176&#45;&gt;Node177 -->
 <g id="edge29" class="edge">
-<title>Node174&#45;&gt;Node175</title>
+<title>Node176&#45;&gt;Node177</title>
 <path fill="none" stroke="#191970" d="M245.1605,-135.0575C269.6098,-122.0567 304.85,-103.3179 325.9808,-92.0817"/>
 <polygon fill="#191970" stroke="#191970" points="243.2388,-132.1152 236.0527,-139.9005 246.5253,-138.2958 243.2388,-132.1152"/>
 </g>
-<!-- Node181&#45;&gt;Node174 -->
+<!-- Node183&#45;&gt;Node176 -->
 <g id="edge31" class="edge">
-<title>Node181&#45;&gt;Node174</title>
+<title>Node183&#45;&gt;Node176</title>
 <path fill="none" stroke="#191970" d="M192.0814,-197.7374C198.8387,-185.1614 207.4554,-169.1246 212.8516,-159.0817"/>
 <polygon fill="#191970" stroke="#191970" points="188.808,-196.435 187.1579,-206.9005 194.9743,-199.7482 188.808,-196.435"/>
 </g>
-<!-- Node183&#45;&gt;Node139 -->
+<!-- Node185&#45;&gt;Node140 -->
 <g id="edge33" class="edge">
-<title>Node183&#45;&gt;Node139</title>
+<title>Node185&#45;&gt;Node140</title>
 <path fill="none" stroke="#191970" d="M454.5795,-397.0966C500.3073,-369.3703 597.2507,-310.5901 635.2502,-287.5496"/>
 <polygon fill="#191970" stroke="#191970" points="452.6301,-394.1854 445.8938,-402.3631 456.2594,-400.1711 452.6301,-394.1854"/>
 </g>
-<!-- Node183&#45;&gt;Node175 -->
+<!-- Node185&#45;&gt;Node177 -->
 <g id="edge37" class="edge">
-<title>Node183&#45;&gt;Node175</title>
+<title>Node185&#45;&gt;Node177</title>
 <path fill="none" stroke="#191970" d="M405.1529,-396.6355C390.7798,-386.5072 373.5107,-371.9946 363,-355 344.4591,-325.0216 344,-313.2487 344,-278 344,-278 344,-278 344,-216.5 344,-169.9722 344,-114.357 344,-92.2517"/>
 <polygon fill="#191970" stroke="#191970" points="403.3934,-399.6704 413.6417,-402.3582 407.3064,-393.8661 403.3934,-399.6704"/>
 </g>
-<!-- Node183&#45;&gt;Node184 -->
+<!-- Node185&#45;&gt;Node186 -->
 <g id="edge34" class="edge">
-<title>Node183&#45;&gt;Node184</title>
+<title>Node185&#45;&gt;Node186</title>
 <path fill="none" stroke="#191970" d="M430.2744,-392.1054C430.4673,-378.1237 430.7168,-360.0346 430.8673,-349.1228"/>
 <polygon fill="#191970" stroke="#191970" points="426.7732,-392.175 430.1349,-402.2223 433.7725,-392.2716 426.7732,-392.175"/>
 </g>
-<!-- Node184&#45;&gt;Node16 -->
+<!-- Node186&#45;&gt;Node16 -->
 <g id="edge35" class="edge">
-<title>Node184&#45;&gt;Node16</title>
+<title>Node186&#45;&gt;Node16</title>
 <path fill="none" stroke="#191970" d="M437.3129,-320.0879C446.0023,-293.3678 461.3169,-246.2756 467.8758,-226.1068"/>
 <polygon fill="#191970" stroke="#191970" points="433.903,-319.2562 434.1387,-329.8484 440.5598,-321.4211 433.903,-319.2562"/>
 </g>
-<!-- Node184&#45;&gt;Node139 -->
+<!-- Node186&#45;&gt;Node140 -->
 <g id="edge36" class="edge">
-<title>Node184&#45;&gt;Node139</title>
+<title>Node186&#45;&gt;Node140</title>
 <path fill="none" stroke="#191970" d="M475.0111,-327.1969C516.7859,-315.5189 578.7032,-298.2103 616.8664,-287.5419"/>
 <polygon fill="#191970" stroke="#191970" points="474.0635,-323.8275 465.375,-329.8906 475.9481,-330.5691 474.0635,-323.8275"/>
 </g>
-<!-- Node186&#45;&gt;Node170 -->
+<!-- Node188&#45;&gt;Node172 -->
 <g id="edge48" class="edge">
-<title>Node186&#45;&gt;Node170</title>
+<title>Node188&#45;&gt;Node172</title>
 <path fill="none" stroke="#191970" d="M1702.2049,-387.5738C1767.517,-360.3138 1860,-315.1536 1860,-278 1860,-278 1860,-278 1860,-149.5 1860,-43.1286 1041.8861,-20.5946 814.1883,-16.3829"/>
 <polygon fill="#191970" stroke="#191970" points="1700.689,-384.413 1692.7801,-391.4628 1703.3591,-390.8838 1700.689,-384.413"/>
 </g>
@@ -955,21 +955,21 @@
 <path fill="none" stroke="#191970" d="M1340.3159,-314.6276C1332.6439,-289.7948 1321.0997,-252.428 1314.6977,-231.7056"/>
 <polygon fill="#191970" stroke="#191970" points="1337.0446,-315.8965 1343.3405,-324.4178 1343.7327,-313.8302 1337.0446,-315.8965"/>
 </g>
-<!-- Node188&#45;&gt;Node13 -->
+<!-- Node190&#45;&gt;Node13 -->
 <g id="edge60" class="edge">
-<title>Node188&#45;&gt;Node13</title>
+<title>Node190&#45;&gt;Node13</title>
 <path fill="none" stroke="#191970" d="M936.9225,-531.9863C934.3194,-519.5286 931.0591,-503.9258 929.0021,-494.0817"/>
 <polygon fill="#191970" stroke="#191970" points="933.5227,-532.8279 938.9941,-541.9005 940.3747,-531.396 933.5227,-532.8279"/>
 </g>
-<!-- Node188&#45;&gt;Node128 -->
+<!-- Node190&#45;&gt;Node129 -->
 <g id="edge59" class="edge">
-<title>Node188&#45;&gt;Node128</title>
+<title>Node190&#45;&gt;Node129</title>
 <path fill="none" stroke="#191970" d="M1003.7715,-539.7662C1040.7578,-531.4872 1087.8264,-518.5352 1127,-500 1148.6233,-489.7688 1148.9945,-478.3812 1171,-469 1248.0546,-436.1507 1274.0882,-450.5199 1356,-433 1364.6294,-431.1543 1373.7051,-429.1323 1382.6369,-427.0951"/>
 <polygon fill="#191970" stroke="#191970" points="1002.6471,-536.429 993.6217,-541.9782 1004.1377,-543.2685 1002.6471,-536.429"/>
 </g>
-<!-- Node189 -->
+<!-- Node191 -->
 <g id="node30" class="node">
-<title>Node189</title>
+<title>Node191</title>
 <g id="a_node30"><a xlink:href="env__func_8h.html" target="_top" xlink:title="Serializable global function used in IR. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1013.5,-469.5 1013.5,-499.5 1118.5,-499.5 1118.5,-469.5 1013.5,-469.5"/>
 <text text-anchor="start" x="1021.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/env</text>
@@ -977,249 +977,249 @@
 </a>
 </g>
 </g>
-<!-- Node188&#45;&gt;Node189 -->
+<!-- Node190&#45;&gt;Node191 -->
 <g id="edge58" class="edge">
-<title>Node188&#45;&gt;Node189</title>
+<title>Node190&#45;&gt;Node191</title>
 <path fill="none" stroke="#191970" d="M967.8024,-537.1339C988.4787,-526.0514 1016.9968,-510.7657 1037.9743,-499.5218"/>
 <polygon fill="#191970" stroke="#191970" points="966.0697,-534.0915 958.9095,-541.9005 969.3767,-540.2611 966.0697,-534.0915"/>
 </g>
-<!-- Node199&#45;&gt;Node200 -->
+<!-- Node201&#45;&gt;Node202 -->
 <g id="edge67" class="edge">
-<title>Node199&#45;&gt;Node200</title>
+<title>Node201&#45;&gt;Node202</title>
 <path fill="none" stroke="#191970" d="M2336.2275,-661.7735C2342.2334,-652.4154 2348.9551,-641.9421 2354.2812,-633.6432"/>
 <polygon fill="#191970" stroke="#191970" points="2333.2131,-659.9903 2330.7574,-670.2967 2339.1042,-663.7712 2333.2131,-659.9903"/>
 </g>
-<!-- Node202&#45;&gt;Node199 -->
+<!-- Node204&#45;&gt;Node201 -->
 <g id="edge70" class="edge">
-<title>Node202&#45;&gt;Node199</title>
+<title>Node204&#45;&gt;Node201</title>
 <path fill="none" stroke="#191970" d="M2321,-727.0249C2321,-718.128 2321,-708.4287 2321,-700.6432"/>
 <polygon fill="#191970" stroke="#191970" points="2317.5001,-727.2966 2321,-737.2967 2324.5001,-727.2967 2317.5001,-727.2966"/>
 </g>
-<!-- Node203&#45;&gt;Node167 -->
+<!-- Node205&#45;&gt;Node169 -->
 <g id="edge92" class="edge">
-<title>Node203&#45;&gt;Node167</title>
+<title>Node205&#45;&gt;Node169</title>
 <path fill="none" stroke="#191970" d="M213.251,-733.0498C170.5408,-711.7967 114,-673.3105 114,-618.5 114,-618.5 114,-618.5 114,-484.5 114,-361.3932 531.5252,-259.7287 680.7579,-227.3233"/>
 <polygon fill="#191970" stroke="#191970" points="211.8723,-736.2705 222.4014,-737.4495 214.9057,-729.9618 211.8723,-736.2705"/>
 </g>
-<!-- Node203&#45;&gt;Node174 -->
+<!-- Node205&#45;&gt;Node176 -->
 <g id="edge93" class="edge">
-<title>Node203&#45;&gt;Node174</title>
+<title>Node205&#45;&gt;Node176</title>
 <path fill="none" stroke="#191970" d="M191.6761,-739.4345C147.3062,-729.8094 94.5753,-715.8566 77,-701 46.0263,-674.8176 38,-659.0572 38,-618.5 38,-618.5 38,-618.5 38,-278 38,-209.847 124.5345,-174.4271 177.6045,-159.113"/>
 <polygon fill="#191970" stroke="#191970" points="191.0714,-742.8841 201.581,-741.5426 192.5287,-736.0375 191.0714,-742.8841"/>
 </g>
-<!-- Node203&#45;&gt;Node181 -->
+<!-- Node205&#45;&gt;Node183 -->
 <g id="edge94" class="edge">
-<title>Node203&#45;&gt;Node181</title>
+<title>Node205&#45;&gt;Node183</title>
 <path fill="none" stroke="#191970" d="M191.785,-737.3153C154.4965,-727.7394 113.2569,-714.6375 100,-701 73.3828,-673.6185 76,-656.6867 76,-618.5 76,-618.5 76,-618.5 76,-339.5 76,-285.9395 134.3893,-244.1292 164.4963,-226.1309"/>
 <polygon fill="#191970" stroke="#191970" points="191.1127,-740.7551 201.6641,-739.7959 192.8175,-733.9658 191.1127,-740.7551"/>
 </g>
-<!-- Node203&#45;&gt;Node184 -->
+<!-- Node205&#45;&gt;Node186 -->
 <g id="edge72" class="edge">
-<title>Node203&#45;&gt;Node184</title>
+<title>Node205&#45;&gt;Node186</title>
 <path fill="none" stroke="#191970" d="M239.1931,-729.4738C218.5586,-704.1788 190,-661.4177 190,-618.5 190,-618.5 190,-618.5 190,-484.5 190,-434.6385 211.7901,-420.4845 252,-391 286.944,-365.3768 334.3496,-352.4676 371.5969,-345.9828"/>
 <polygon fill="#191970" stroke="#191970" points="236.7149,-731.9645 245.8345,-737.3575 242.0685,-727.4546 236.7149,-731.9645"/>
 </g>
-<!-- Node203&#45;&gt;Node128 -->
+<!-- Node205&#45;&gt;Node129 -->
 <g id="edge73" class="edge">
-<title>Node203&#45;&gt;Node128</title>
+<title>Node205&#45;&gt;Node129</title>
 <path fill="none" stroke="#191970" d="M297.2547,-732.8602C334.2835,-714.116 393.171,-686.3761 447,-670 572.6569,-631.7722 902.4517,-602.1645 1029,-567 1145.9823,-534.4936 1170.0557,-510.9312 1284,-469 1323.8725,-454.327 1369.5512,-438.3278 1402.2177,-427.0253"/>
 <polygon fill="#191970" stroke="#191970" points="295.6416,-729.7539 288.3279,-737.4195 298.8256,-735.9879 295.6416,-729.7539"/>
 </g>
-<!-- Node203&#45;&gt;Node188 -->
+<!-- Node205&#45;&gt;Node190 -->
 <g id="edge74" class="edge">
-<title>Node203&#45;&gt;Node188</title>
+<title>Node205&#45;&gt;Node190</title>
 <path fill="none" stroke="#191970" d="M282.76,-730.6287C303.7496,-711.7011 336.6241,-685.0677 370,-670 534.9441,-595.5351 747.9329,-567.2855 861.4556,-557.0171"/>
 <polygon fill="#191970" stroke="#191970" points="280.3156,-728.1216 275.3212,-737.4654 285.0524,-733.2756 280.3156,-728.1216"/>
 </g>
-<!-- Node203&#45;&gt;Node206 -->
+<!-- Node205&#45;&gt;Node208 -->
 <g id="edge75" class="edge">
-<title>Node203&#45;&gt;Node206</title>
+<title>Node205&#45;&gt;Node208</title>
 <path fill="none" stroke="#191970" d="M328.0416,-746.8857C484.4593,-733.9793 867.4065,-702.3815 1013.7964,-690.3025"/>
 <polygon fill="#191970" stroke="#191970" points="327.7495,-743.3978 318.0712,-747.7084 328.3251,-750.3741 327.7495,-743.3978"/>
 </g>
-<!-- Node203&#45;&gt;Node211 -->
+<!-- Node205&#45;&gt;Node213 -->
 <g id="edge91" class="edge">
-<title>Node203&#45;&gt;Node211</title>
+<title>Node205&#45;&gt;Node213</title>
 <path fill="none" stroke="#191970" d="M328.2017,-750.8166C506.594,-746.0438 983.9158,-730.7991 1139,-701 1216.9804,-686.0162 1304.2375,-652.9658 1351.648,-633.5511"/>
 <polygon fill="#191970" stroke="#191970" points="328.0588,-747.3191 318.155,-751.0828 328.2442,-754.3166 328.0588,-747.3191"/>
 </g>
-<!-- Node206&#45;&gt;Node8 -->
+<!-- Node208&#45;&gt;Node8 -->
 <g id="edge76" class="edge">
-<title>Node206&#45;&gt;Node8</title>
+<title>Node208&#45;&gt;Node8</title>
 <path fill="none" stroke="#191970" d="M1069.8537,-660.4088C1069.5969,-642.9482 1071.8426,-619.8663 1083,-603 1132.5819,-528.0488 1179.7378,-545.8978 1257,-500 1280.1325,-486.2581 1283.7173,-478.2053 1309,-469 1402.9238,-434.803 1458.9312,-502.2619 1531,-433 1561.5743,-403.6165 1550,-381.9049 1550,-339.5 1550,-339.5 1550,-339.5 1550,-278 1550,-236.8719 1545.8672,-188.4822 1543.5425,-164.5162"/>
 <polygon fill="#191970" stroke="#191970" points="1066.3588,-660.6056 1070.2796,-670.4482 1073.3525,-660.3088 1066.3588,-660.6056"/>
 </g>
-<!-- Node206&#45;&gt;Node188 -->
+<!-- Node208&#45;&gt;Node190 -->
 <g id="edge77" class="edge">
-<title>Node206&#45;&gt;Node188</title>
+<title>Node208&#45;&gt;Node190</title>
 <path fill="none" stroke="#191970" d="M1051.3655,-662.6848C1036.0555,-645.9318 1014.538,-622.7497 995,-603 980.2401,-588.0801 962.5363,-571.42 951.5251,-561.1968"/>
 <polygon fill="#191970" stroke="#191970" points="1049.0253,-665.3132 1058.3467,-670.3493 1054.2003,-660.5995 1049.0253,-665.3132"/>
 </g>
-<!-- Node206&#45;&gt;Node208 -->
+<!-- Node208&#45;&gt;Node210 -->
 <g id="edge78" class="edge">
-<title>Node206&#45;&gt;Node208</title>
+<title>Node208&#45;&gt;Node210</title>
 <path fill="none" stroke="#191970" d="M1077.0352,-660.4614C1082.1311,-641.7576 1091.9301,-617.0863 1110,-603 1143.041,-577.2431 1256.4163,-562.8143 1326.8608,-556.1421"/>
 <polygon fill="#191970" stroke="#191970" points="1073.5814,-659.851 1074.6082,-670.396 1080.3815,-661.5123 1073.5814,-659.851"/>
 </g>
-<!-- Node206&#45;&gt;Node211 -->
+<!-- Node208&#45;&gt;Node213 -->
 <g id="edge80" class="edge">
-<title>Node206&#45;&gt;Node211</title>
+<title>Node208&#45;&gt;Node213</title>
 <path fill="none" stroke="#191970" d="M1140.0319,-671.0297C1196.3631,-659.0482 1275.541,-642.2071 1328.9299,-630.8514"/>
 <polygon fill="#191970" stroke="#191970" points="1139.196,-667.6291 1130.1429,-673.1331 1140.6523,-674.476 1139.196,-667.6291"/>
 </g>
-<!-- Node206&#45;&gt;Node217 -->
+<!-- Node208&#45;&gt;Node219 -->
 <g id="edge88" class="edge">
-<title>Node206&#45;&gt;Node217</title>
+<title>Node208&#45;&gt;Node219</title>
 <path fill="none" stroke="#191970" d="M1098.4046,-664.6286C1113.3829,-654.5979 1132.0595,-642.7976 1147.5146,-633.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1096.3684,-661.7807 1090.0653,-670.2967 1100.3033,-667.57 1096.3684,-661.7807"/>
 </g>
-<!-- Node206&#45;&gt;Node218 -->
+<!-- Node208&#45;&gt;Node220 -->
 <g id="edge90" class="edge">
-<title>Node206&#45;&gt;Node218</title>
+<title>Node208&#45;&gt;Node220</title>
 <path fill="none" stroke="#191970" d="M1028.2171,-666.4516C1004.2841,-656.0392 975.0545,-643.3224 952.7113,-633.6017"/>
 <polygon fill="#191970" stroke="#191970" points="1026.8733,-669.6838 1037.4394,-670.4639 1029.666,-663.265 1026.8733,-669.6838"/>
 </g>
-<!-- Node208&#45;&gt;Node209 -->
+<!-- Node210&#45;&gt;Node211 -->
 <g id="edge79" class="edge">
-<title>Node208&#45;&gt;Node209</title>
+<title>Node210&#45;&gt;Node211</title>
 <path fill="none" stroke="#191970" d="M1381.6173,-526.3179C1380.4115,-517.3414 1379.0915,-507.5143 1378.0342,-499.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1378.1575,-526.8517 1382.9578,-536.2967 1385.0952,-525.9197 1378.1575,-526.8517"/>
 </g>
-<!-- Node211&#45;&gt;Node6 -->
+<!-- Node213&#45;&gt;Node6 -->
 <g id="edge81" class="edge">
-<title>Node211&#45;&gt;Node6</title>
+<title>Node213&#45;&gt;Node6</title>
 <path fill="none" stroke="#191970" d="M1421.1387,-598.301C1459.3686,-575.7171 1517.3898,-541.5617 1528,-536 1609.2301,-493.4206 1707.7367,-449.4148 1759.0658,-427.039"/>
 <polygon fill="#191970" stroke="#191970" points="1419.1887,-595.3879 1412.3604,-603.4888 1422.7501,-601.4142 1419.1887,-595.3879"/>
 </g>
-<!-- Node211&#45;&gt;Node186 -->
+<!-- Node213&#45;&gt;Node188 -->
 <g id="edge87" class="edge">
-<title>Node211&#45;&gt;Node186</title>
+<title>Node213&#45;&gt;Node188</title>
 <path fill="none" stroke="#191970" d="M1419.794,-597.4362C1431.0781,-589 1443.0997,-578.5364 1452,-567 1481.0539,-529.3411 1460.7579,-502.0121 1495,-469 1507.6302,-456.8234 1544.8035,-442.2492 1578.2154,-430.886"/>
 <polygon fill="#191970" stroke="#191970" points="1417.4587,-594.8039 1411.3727,-603.4763 1421.5385,-600.4921 1417.4587,-594.8039"/>
 </g>
-<!-- Node211&#45;&gt;Node188 -->
+<!-- Node213&#45;&gt;Node190 -->
 <g id="edge82" class="edge">
-<title>Node211&#45;&gt;Node188</title>
+<title>Node213&#45;&gt;Node190</title>
 <path fill="none" stroke="#191970" d="M1318.5574,-608.2183C1232.9448,-595.3572 1087.5233,-573.5114 1004.3608,-561.0183"/>
 <polygon fill="#191970" stroke="#191970" points="1318.2843,-611.7164 1328.6933,-609.7409 1319.3242,-604.7941 1318.2843,-611.7164"/>
 </g>
-<!-- Node211&#45;&gt;Node208 -->
+<!-- Node213&#45;&gt;Node210 -->
 <g id="edge83" class="edge">
-<title>Node211&#45;&gt;Node208</title>
+<title>Node213&#45;&gt;Node210</title>
 <path fill="none" stroke="#191970" d="M1386.2395,-593.0249C1385.974,-584.128 1385.6844,-574.4287 1385.452,-566.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1382.7493,-593.4056 1386.5462,-603.2967 1389.7461,-593.1967 1382.7493,-593.4056"/>
 </g>
-<!-- Node211&#45;&gt;Node209 -->
+<!-- Node213&#45;&gt;Node211 -->
 <g id="edge86" class="edge">
-<title>Node211&#45;&gt;Node209</title>
+<title>Node213&#45;&gt;Node211</title>
 <path fill="none" stroke="#191970" d="M1346.6552,-597.8289C1335.2818,-589.8471 1324.3005,-579.5645 1318,-567 1311.8241,-554.6839 1312.3525,-548.5671 1318,-536 1324.7893,-520.8922 1338.4438,-508.4132 1350.7992,-499.5246"/>
 <polygon fill="#191970" stroke="#191970" points="1345.0656,-600.97 1355.3579,-603.4839 1348.8797,-595.1004 1345.0656,-600.97"/>
 </g>
-<!-- Node211&#45;&gt;Node212 -->
+<!-- Node213&#45;&gt;Node214 -->
 <g id="edge84" class="edge">
-<title>Node211&#45;&gt;Node212</title>
+<title>Node213&#45;&gt;Node214</title>
 <path fill="none" stroke="#191970" d="M1437.4933,-600.2759C1469.2851,-589.7165 1510.1771,-576.5805 1542.3686,-566.6017"/>
 <polygon fill="#191970" stroke="#191970" points="1436.3168,-596.9787 1427.9387,-603.4639 1438.5324,-603.6188 1436.3168,-596.9787"/>
 </g>
-<!-- Node212&#45;&gt;Node211 -->
+<!-- Node214&#45;&gt;Node213 -->
 <g id="edge85" class="edge">
-<title>Node212&#45;&gt;Node211</title>
+<title>Node214&#45;&gt;Node213</title>
 <path fill="none" stroke="#191970" d="M1544.2975,-569.7936C1512.4658,-580.3642 1471.573,-593.499 1439.4198,-603.4639"/>
 <polygon fill="#191970" stroke="#191970" points="1545.4872,-573.0864 1553.8657,-566.6017 1543.272,-566.4461 1545.4872,-573.0864"/>
 </g>
-<!-- Node217&#45;&gt;Node206 -->
+<!-- Node219&#45;&gt;Node208 -->
 <g id="edge89" class="edge">
-<title>Node217&#45;&gt;Node206</title>
+<title>Node219&#45;&gt;Node208</title>
 <path fill="none" stroke="#191970" d="M1150.5469,-639.4038C1135.5183,-649.4667 1116.778,-661.3044 1101.3046,-670.4639"/>
 <polygon fill="#191970" stroke="#191970" points="1152.6103,-642.2334 1158.9139,-633.7177 1148.6757,-636.4438 1152.6103,-642.2334"/>
 </g>
-<!-- Node220&#45;&gt;Node166 -->
+<!-- Node222&#45;&gt;Node168 -->
 <g id="edge98" class="edge">
-<title>Node220&#45;&gt;Node166</title>
+<title>Node222&#45;&gt;Node168</title>
 <path fill="none" stroke="#191970" d="M1725.3788,-728.4837C1720.1132,-719.1996 1714.247,-708.8565 1709.5887,-700.6432"/>
 <polygon fill="#191970" stroke="#191970" points="1722.3993,-730.325 1730.3772,-737.2967 1728.4882,-726.8716 1722.3993,-730.325"/>
 </g>
-<!-- Node222&#45;&gt;Node13 -->
+<!-- Node224&#45;&gt;Node13 -->
 <g id="edge107" class="edge">
-<title>Node222&#45;&gt;Node13</title>
+<title>Node224&#45;&gt;Node13</title>
 <path fill="none" stroke="#191970" d="M886.061,-731.3073C875.3325,-722.9128 864.058,-712.5094 856,-701 830.2814,-664.2656 825.5186,-647.3718 832,-603 836.4916,-572.2501 833.7022,-561.1181 852,-536 865.8573,-516.9776 888.9909,-502.6523 905.9082,-494.0004"/>
 <polygon fill="#191970" stroke="#191970" points="883.9928,-734.1311 894.0963,-737.32 888.1867,-728.5265 883.9928,-734.1311"/>
 </g>
-<!-- Node222&#45;&gt;Node16 -->
+<!-- Node224&#45;&gt;Node16 -->
 <g id="edge102" class="edge">
-<title>Node222&#45;&gt;Node16</title>
+<title>Node224&#45;&gt;Node16</title>
 <path fill="none" stroke="#191970" d="M849.8469,-744.989C806.1884,-738.0823 749.4117,-725.0892 704,-701 685.935,-691.4172 687.4797,-680.6128 670,-670 630.3368,-645.9185 614.511,-652.5983 572,-634 512.0875,-607.7887 493.2785,-606.2874 441,-567 408.3241,-542.444 400.0178,-534.4374 378,-500 335.7113,-433.8575 326.5756,-393.5444 363,-324 385.9532,-280.1759 433.0003,-243.0157 456.6942,-226.1678"/>
 <polygon fill="#191970" stroke="#191970" points="849.3878,-748.4593 859.7988,-746.4946 850.4349,-741.5381 849.3878,-748.4593"/>
 </g>
-<!-- Node222&#45;&gt;Node139 -->
+<!-- Node224&#45;&gt;Node140 -->
 <g id="edge103" class="edge">
-<title>Node222&#45;&gt;Node139</title>
+<title>Node224&#45;&gt;Node140</title>
 <path fill="none" stroke="#191970" d="M885.1955,-731.6442C818.1751,-686.7868 667.2925,-573.7056 613,-433 592.5531,-380.0094 629.2207,-312.4927 644.642,-287.6946"/>
 <polygon fill="#191970" stroke="#191970" points="883.428,-734.6715 893.6992,-737.27 887.2903,-728.8334 883.428,-734.6715"/>
 </g>
-<!-- Node222&#45;&gt;Node183 -->
+<!-- Node224&#45;&gt;Node185 -->
 <g id="edge104" class="edge">
-<title>Node222&#45;&gt;Node183</title>
+<title>Node224&#45;&gt;Node185</title>
 <path fill="none" stroke="#191970" d="M873.4011,-733.2748C806.2114,-703.1899 676.6171,-640.733 580,-567 518.3783,-519.9736 458.3369,-447.655 437.6098,-421.6934"/>
 <polygon fill="#191970" stroke="#191970" points="872.2907,-736.6114 882.8502,-737.4761 875.1347,-730.2151 872.2907,-736.6114"/>
 </g>
-<!-- Node222&#45;&gt;Node184 -->
+<!-- Node224&#45;&gt;Node186 -->
 <g id="edge105" class="edge">
-<title>Node222&#45;&gt;Node184</title>
+<title>Node224&#45;&gt;Node186</title>
 <path fill="none" stroke="#191970" d="M850.1191,-739.3401C816.3787,-731.1564 775.7189,-718.7469 742,-701 722.7124,-690.8486 722.3356,-681.7842 704,-670 673.568,-650.4415 661.8583,-652.8788 631,-634 503.3869,-555.9274 428.142,-566.6877 361,-433 352.6223,-416.3189 352.5753,-407.6574 361,-391 370.8736,-371.478 392.0194,-357.5488 408.5005,-349.1397"/>
 <polygon fill="#191970" stroke="#191970" points="849.3594,-742.7569 859.8953,-741.6411 850.9632,-735.9431 849.3594,-742.7569"/>
 </g>
-<!-- Node222&#45;&gt;Node128 -->
+<!-- Node224&#45;&gt;Node129 -->
 <g id="edge106" class="edge">
-<title>Node222&#45;&gt;Node128</title>
+<title>Node224&#45;&gt;Node129</title>
 <path fill="none" stroke="#191970" d="M986.3249,-742.7372C1030.9694,-734.8812 1089.8838,-721.6765 1139,-701 1164.5106,-690.2608 1226.4015,-655.3639 1244,-634 1294.1133,-573.1641 1253.799,-525.26 1309,-469 1328.5756,-449.0488 1356.1672,-435.7705 1381.4636,-427.0925"/>
 <polygon fill="#191970" stroke="#191970" points="985.7161,-739.2905 976.4507,-744.4288 986.8982,-746.1899 985.7161,-739.2905"/>
 </g>
-<!-- Node222&#45;&gt;Node188 -->
+<!-- Node224&#45;&gt;Node190 -->
 <g id="edge108" class="edge">
-<title>Node222&#45;&gt;Node188</title>
+<title>Node224&#45;&gt;Node190</title>
 <path fill="none" stroke="#191970" d="M900.3496,-729.2439C879.2718,-700.9099 846.0852,-654.3127 840,-634 836.0461,-620.8017 832.8029,-614.7486 840,-603 853.2119,-581.4329 878.608,-568.5413 900.6276,-561.0499"/>
 <polygon fill="#191970" stroke="#191970" points="897.6629,-731.4953 906.4588,-737.4015 903.2659,-727.2992 897.6629,-731.4953"/>
 </g>
-<!-- Node222&#45;&gt;Node194 -->
+<!-- Node224&#45;&gt;Node196 -->
 <g id="edge109" class="edge">
-<title>Node222&#45;&gt;Node194</title>
+<title>Node224&#45;&gt;Node196</title>
 <path fill="none" stroke="#191970" d="M849.802,-739.1785C783.0102,-726.1317 682.8994,-706.5766 623.7409,-695.0208"/>
 <polygon fill="#191970" stroke="#191970" points="849.3292,-742.6522 859.8147,-741.1344 850.6712,-735.7821 849.3292,-742.6522"/>
 </g>
-<!-- Node222&#45;&gt;Node206 -->
+<!-- Node224&#45;&gt;Node208 -->
 <g id="edge112" class="edge">
-<title>Node222&#45;&gt;Node206</title>
+<title>Node224&#45;&gt;Node208</title>
 <path fill="none" stroke="#191970" d="M961.7829,-733.4516C985.7159,-723.0392 1014.9455,-710.3224 1037.2887,-700.6017"/>
 <polygon fill="#191970" stroke="#191970" points="960.334,-730.265 952.5606,-737.4639 963.1267,-736.6838 960.334,-730.265"/>
 </g>
-<!-- Node222&#45;&gt;Node209 -->
+<!-- Node224&#45;&gt;Node211 -->
 <g id="edge114" class="edge">
-<title>Node222&#45;&gt;Node209</title>
+<title>Node224&#45;&gt;Node211</title>
 <path fill="none" stroke="#191970" d="M986.2521,-744.3566C1057.5278,-735.0998 1164.003,-718.9299 1201,-701 1241.5216,-681.362 1252.0773,-671.5035 1277,-634 1302.437,-595.7226 1283.4277,-573.4981 1310,-536 1320.504,-521.1771 1336.4858,-508.5511 1350.0917,-499.5107"/>
 <polygon fill="#191970" stroke="#191970" points="985.4771,-740.9273 976.0039,-745.6716 986.3681,-747.8704 985.4771,-740.9273"/>
 </g>
-<!-- Node222&#45;&gt;Node211 -->
+<!-- Node224&#45;&gt;Node213 -->
 <g id="edge113" class="edge">
-<title>Node222&#45;&gt;Node211</title>
+<title>Node224&#45;&gt;Node213</title>
 <path fill="none" stroke="#191970" d="M986.2213,-749.4542C1084.3351,-744.1036 1259.582,-730.6397 1315,-701 1344.1683,-685.3996 1366.9482,-652.8931 1378.5488,-633.6692"/>
 <polygon fill="#191970" stroke="#191970" points="985.9034,-745.966 976.1033,-749.9921 986.2751,-752.9562 985.9034,-745.966"/>
 </g>
-<!-- Node222&#45;&gt;Node212 -->
+<!-- Node224&#45;&gt;Node214 -->
 <g id="edge111" class="edge">
-<title>Node222&#45;&gt;Node212</title>
+<title>Node224&#45;&gt;Node214</title>
 <path fill="none" stroke="#191970" d="M986.4176,-747.8405C1090.3492,-740.1666 1282.8158,-723.454 1348,-701 1441.3623,-668.8395 1537.3022,-597.6453 1576.5655,-566.5375"/>
 <polygon fill="#191970" stroke="#191970" points="986.0558,-744.3575 976.3374,-748.5769 986.5659,-751.3389 986.0558,-744.3575"/>
 </g>
-<!-- Node222&#45;&gt;Node218 -->
+<!-- Node224&#45;&gt;Node220 -->
 <g id="edge115" class="edge">
-<title>Node222&#45;&gt;Node218</title>
+<title>Node224&#45;&gt;Node220</title>
 <path fill="none" stroke="#191970" d="M918,-727.3415C918,-699.8131 918,-656.5714 918,-633.7614"/>
 <polygon fill="#191970" stroke="#191970" points="914.5001,-727.3889 918,-737.389 921.5001,-727.389 914.5001,-727.3889"/>
 </g>
-<!-- Node222&#45;&gt;Node166 -->
+<!-- Node224&#45;&gt;Node168 -->
 <g id="edge110" class="edge">
-<title>Node222&#45;&gt;Node166</title>
+<title>Node224&#45;&gt;Node168</title>
 <path fill="none" stroke="#191970" d="M986.3372,-746.6525C1138.1482,-733.6623 1501.2168,-702.5951 1642.9363,-690.4684"/>
 <polygon fill="#191970" stroke="#191970" points="985.8777,-743.1789 976.2126,-747.5188 986.4746,-750.1534 985.8777,-743.1789"/>
 </g>
diff --git a/docs/reference/api/doxygen/classes.html b/docs/reference/api/doxygen/classes.html
index 8267ed7a2..fd2523a4e 100644
--- a/docs/reference/api/doxygen/classes.html
+++ b/docs/reference/api/doxygen/classes.html
@@ -65,253 +65,254 @@ $(function() {
 <div class="qindex"><a class="qindex" href="#letter_a">a</a>&#160;|&#160;<a class="qindex" href="#letter_b">b</a>&#160;|&#160;<a class="qindex" href="#letter_c">c</a>&#160;|&#160;<a class="qindex" href="#letter_d">d</a>&#160;|&#160;<a class="qindex" href="#letter_e">e</a>&#160;|&#160;<a class="qindex" href="#letter_f">f</a>&#160;|&#160;<a class="qindex" href="#letter_g">g</a>&#160;|&#160;<a class="qindex" href="#letter_h">h</a>&#160;|&#160;<a class="qindex" href="#letter_i">i</a>&#160;|& [...]
 <table class="classindex">
 <tr><td rowspan="2" valign="bottom"><a name="letter_a"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;a&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1is__specialized.html">is_specialized</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PoolInfoProperties.html">PoolInfoProperties< [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModelNode.html">CostModelNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1is__specialized_3_01Container_3_01Args_8_8_8_01_4_00_01Container_01_4.html">is_specialized&lt; Container&lt; Args... &gt;, Container &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzer.html">AccessAnalyzer</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModelNode.html">CostModelNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1It [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzerNode.html">AccessAnalyzerNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1CountNode.html">CountNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_ [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool1DAttrs.html">AdaptivePool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CropAndResizeAttrs.html">CropAndResizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map_1_1iterator.html">Map::iterator</a [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1is__specialized_3_01Container_3_01Args_8_8_8_01_4_00_01Container_01_4.html">is_specialized&lt; Container&lt; Args... &gt;, Container &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModelNode.html">CostModelNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1IterAdapter.html">IterAdapter</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1PostprocNode.html">Postproc [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzer.html">AccessAnalyzer</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CostModelNode.html">CostModelNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Ma [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AccessAnalyzerNode.html">AccessAnalyzerNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1CountNode.html">CountNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_ [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool1DAttrs.html">AdaptivePool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CropAndResizeAttrs.html">CropAndResizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1Iterator.html">Iterator</a> (< [...]
 <tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool2DAttrs.html">AdaptivePool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_d"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;d&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1Iterator.html">Iterator</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1PragmaStepNode.html">PragmaStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1StftAttrs.ht [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool3DAttrs.html">AdaptivePool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1Span_1_1iterator__base.html">Span::iterator_base</a> (<a class="el" href="namespacetvm_1_1support.html">tvm::support</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Prefetch.html">Prefetch</a> (<a  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Add.html">Add</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Database.html">Database</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1IteratorNode.html">IteratorNode</a> (<a class="el" href="namespa [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AddNode.html">AddNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1DatabaseNode.html">DatabaseNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1auto__scheduler_1_1AttachMapNode_1_1IterKeyHash.html">AttachMapNod [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADT.html">ADT</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DataProducer.html">DataProducer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExpr.html">IterMapExpr</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADTObj.html">ADTObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DataProducerNode.html">DataProducerNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExprNode.html">IterMapExprNode</a> (<a class="el" href="namespace [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AffineGridAttrs.html">AffineGridAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DataType.html">DataType</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResult.html">IterMapResult</a> (<a class="el" href="name [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AffineType.html">AffineType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DataTypePattern.html">DataTypePattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResultNode.html">IterMapResultNode</a> (<a class="el" href="namespacetvm_1_1arith.ht [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AffineTypeNode.html">AffineTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DataTypePatternNode.html">DataTypePatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMark.html">IterMark</a> (<a class="el" href="namespacetvm_1_1arith.html [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllClassNonMaximumSuppressionAttrs.html">AllClassNonMaximumSuppressionAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DebugAttrs.html">DebugAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMarkNode.html">IterMar [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Allocate.html">Allocate</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DeclBuffer.html">DeclBuffer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSplitExpr.html">IterSplitExpr</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm:: [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateConst.html">AllocateConst</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DeclBufferNode.html">DeclBufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSplitExprNode.html">IterSplitExprNode</a> (<a class="el" href="namespa [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateConstNode.html">AllocateConstNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeformableConv2DAttrs.html">DeformableConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSumExpr.html">IterSumExpr</a> (<a class=" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfo.html">AllocatedPoolInfo</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DenseAttrs.html">DenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSumExprNode.html">IterSumExprNode</a> (<a [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfoNode.html">AllocatedPoolInfoNode</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DenseMapNode.html">DenseMapNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> (<a [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateNode.html">AllocateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DensePackAttrs.html">DensePackAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttr.html">IterVarAttr</a> (<a class="el" href="namespacetvm_1_1t [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Allocator.html">Allocator</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Dependency.html">Dependency</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttrNode.html">IterVarAttrNode</a> (<a class="el" href="na [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocStorageAttrs.html">AllocStorageAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DependencyNode.html">DependencyNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> (<a class="el" href="namesp [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocTensorAttrs.html">AllocTensorAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1qnn_1_1DequantizeAttrs.html">DequantizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay_1_1qnn.html">tvm::relay::qnn</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelation.html">IterVarRelatio [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AltPattern.html">AltPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DeviceAPI.html">DeviceAPI</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelationNode.html">IterVarRelationNode</a> (<a class="el" href="name [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AltPatternNode.html">AltPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeviceCopyAttrs.html">DeviceCopyAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_l"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><d [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ProfilerNode.html">ProfilerNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1StoreNode.html">StoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1Analyzer.html">Analyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DeviceWrapper.html">DeviceWrapper</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ProgramBuilder.html"> [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1And.html">And</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1profiling_1_1DeviceWrapperNode.html">DeviceWrapperNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html">L2NormalizeAtt [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AndNode.html">AndNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPattern.html">DFPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDoc.html">LambdaDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1p [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStep.html">AnnotationStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternCallback.html">DFPatternCallback</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStepNode.html">AnnotationStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternCallbackNode.html">DFPatternCallbackNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayerNor [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Any.html">Any</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor.html">DFPatternFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Layout.html">Layout</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&# [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AnyNode.html">AnyNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor_3_01R_07const_01DFPattern_01_6n_00_01Args_8_8_8_08_4.html">DFPatternFunctor&lt; R(const DFPattern &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="c [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArangeAttrs.html">ArangeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternNode.html">DFPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LayoutNode.html">LayoutNode</a> (<a class="el" href="namespacetvm_1_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfo.html">ArgInfo</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternVisitor.html">DFPatternVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html">LayoutTransformA [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfoNode.html">ArgInfoNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Diagnostic.html">Diagnostic</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LE.html">LE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a> [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArgReduceAttrs.html">ArgReduceAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticBuilder.html">DiagnosticBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html">LeakyReluAttrs</a> (<a class="el" href="namespacetvm_1_1 [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1Span_1_1iterator__base.html">Span::iterator_base</a> (<a class="el" href="namespacetvm_1_1support.html">tvm::support</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1PrefetchNode.html">PrefetchNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Stmt.html">Stmt</a> (<a class="el" href="namespacetv [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AdaptivePool3DAttrs.html">AdaptivePool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1IteratorNode.html">IteratorNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1PreloadMeasure [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Add.html">Add</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Database.html">Database</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1auto__scheduler_1_1AttachMapNode_1_1IterKeyHash.html">AttachMapNode::IterKeyHash</ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AddNode.html">AddNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1DatabaseNode.html">DatabaseNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExpr.html">IterMapExpr</a> (<a class="el" href="nam [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADT.html">ADT</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DataProducer.html">DataProducer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapExprNode.html">IterMapExprNode</a> (<a class="el" href="namespacetvm_1_1arith.h [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ADTObj.html">ADTObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DataProducerNode.html">DataProducerNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResult.html">IterMapResult</a> (<a class="el" href="namespacetvm_ [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AffineGridAttrs.html">AffineGridAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DataType.html">DataType</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMapResultNode.html">IterMapResultNode</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AffineType.html">AffineType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DataTypePattern.html">DataTypePattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMark.html">IterMark</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AffineTypeNode.html">AffineTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DataTypePatternNode.html">DataTypePatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterMarkNode.html">IterMarkNode</a> (<a class="el" href="namespacetvm_1_1ar [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllClassNonMaximumSuppressionAttrs.html">AllClassNonMaximumSuppressionAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DebugAttrs.html">DebugAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSplitExpr.html">IterSp [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Allocate.html">Allocate</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DeclBuffer.html">DeclBuffer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSplitExprNode.html">IterSplitExprNode</a> (<a class="el" href="namespacetvm_1_1arith.htm [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateConst.html">AllocateConst</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DeclBufferNode.html">DeclBufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSumExpr.html">IterSumExpr</a> (<a class="el" href="namespacetvm_1_1ari [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateConstNode.html">AllocateConstNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeformableConv2DAttrs.html">DeformableConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IterSumExprNode.html">IterSumExprNode</a> (<a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfo.html">AllocatedPoolInfo</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DenseAttrs.html">DenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVar.html">IterVar</a> (<a class="el" href=" [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1AllocatedPoolInfoNode.html">AllocatedPoolInfoNode</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DenseMapNode.html">DenseMapNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttr.html">IterVarAttr< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AllocateNode.html">AllocateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DensePackAttrs.html">DensePackAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarAttrNode.html">IterVarAttrNode</a> (<a class="el" href="namespace [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Allocator.html">Allocator</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Dependency.html">Dependency</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IterVarNode.html">IterVarNode</a> (<a class="el" href="namespace [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocStorageAttrs.html">AllocStorageAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DependencyNode.html">DependencyNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelation.html">IterVarRelation</a> (<a class="el" href= [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AllocTensorAttrs.html">AllocTensorAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1qnn_1_1DequantizeAttrs.html">DequantizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay_1_1qnn.html">tvm::relay::qnn</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1IterVarRelationNode.html">IterVarRel [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AltPattern.html">AltPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1DeviceAPI.html">DeviceAPI</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_l"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">& [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1Profiler.html">Profiler</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1StoreNode.html">StoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AltPatternNode.html">AltPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DeviceCopyAttrs.html">DeviceCopyAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ProfilerNode.html">ProfilerNode</a> (<a class="e [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1Analyzer.html">Analyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DeviceWrapper.html">DeviceWrapper</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html">L2Norma [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1And.html">And</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1profiling_1_1DeviceWrapperNode.html">DeviceWrapperNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDoc.html">LambdaDoc< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AndNode.html">AndNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPattern.html">DFPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LambdaDocNode.html">LambdaDocNode</a> (<a class="el" href="namespacetvm_1_1scr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStep.html">AnnotationStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternCallback.html">DFPatternCallback</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html">Lay [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AnnotationStepNode.html">AnnotationStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternCallbackNode.html">DFPatternCallbackNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Layout.html [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Any.html">Any</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor.html">DFPatternFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LayoutAxis.html">LayoutAxis</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::ti [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AnyNode.html">AnyNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor_3_01R_07const_01DFPattern_01_6n_00_01Args_8_8_8_08_4.html">DFPatternFunctor&lt; R(const DFPattern &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="c [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArangeAttrs.html">ArangeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternNode.html">DFPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html">LayoutTransformAttrs</a> (<a class="el" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfo.html">ArgInfo</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DFPatternVisitor.html">DFPatternVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LE.html">LE</a> (<a class="el" href="namespace [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ArgInfoNode.html">ArgInfoNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Diagnostic.html">Diagnostic</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html">LeakyReluAttrs</a> (<a class="el" href="namespacetv [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArgsortAttrs.html">ArgsortAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticContext.html">DiagnosticContext</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1LENode.html">LENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a> [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Array.html">Array</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticContextNode.html">DiagnosticContextNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Let.html">Let</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;& [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayAccessor.html">ArrayAccessor</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticNode.html">DiagnosticNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> (<a class="el" href="namespac [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayAccessor_3_01const_01char_01_5_00_01_1_1tvm_1_1runtime_1_1String_01_4.html">ArrayAccessor&lt; const char *, ::tvm::runtime::String &gt;</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticRenderer.html">DiagnosticRenderer</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;& [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1ArrayHandler.html">SimpleObjAllocator::ArrayHandler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticRendererNode.html">DiagnosticRendererNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetNode.html">LetNode</a> [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ArrayIndexPath.html">ArrayIndexPath</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrs.html">DictAttrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetPattern.html">LetPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td>< [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ArrayIndexPathNode.html">ArrayIndexPathNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrsNode.html">DictAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetPatternNode.html">LetPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>) [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayIterator.html">ArrayIterator</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html">DictDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetS [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ArrayNode.html">ArrayNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html">DictDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetStmtNode.html">LetStmtNode</a> (<a  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDoc.html">AssertDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DilateAttrs.html">DilateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1LinearCongruentialEngine.html">LinearCo [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html">AssertDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html">Dilation2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmt.html">AssertStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Div.html">Div</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.html">ListDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.h [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html">AssertStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DivNode.html">DivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDoc.html">LiteralDoc</a> (<a class="el" href="namespacetvm_1_1scrip [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1TaskSchedulerNode.html">TaskSchedulerNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDoc.html">AssignDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html">Doc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNo [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDocNode.html">AssignDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html">DocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Load.html" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMap.html">AttachMap</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html">DominatorPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoadNode.html">LoadNode</a> (<a class=" [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Tensor.html">Tensor</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMapNode.html">AttachMapNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPatternNode.html">DominatorPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilde [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDoc.html">AttrAccessDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html">DropoutAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilderNode.html [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDocNode.html">AttrAccessDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DurationNode.html">DurationNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="cl [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html">AttrDocEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DynExpandDimsAttrs.html">DynExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunnerNode.html">LocalRunnerNode</a>  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocVisitor.html">AttrDocVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_e"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;e&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoopRV.html">LoopRV</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1RandomModelNode.html">RandomModelNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1te_1_1TensorDom.html">TensorDom</a> (<a class="el" href="n [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1AttrError.html">AttrError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoopRVNode.html">LoopRVNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Range.html">Range</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a clas [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrExistVisitor.html">AttrExistVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1EinsumAttrs.html">EinsumAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html">LRNAttrs</a> (<a class="el" href="namespac [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfo.html">AttrFieldInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LT.html">LT</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfoNode.html">AttrFieldInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LTNode.html">LTNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td v [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttributeAccessPath.html">AttributeAccessPath</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQ.html">EQ</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_m"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;m&#160;&#160;</div></t [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1RebaseNode.html">RebaseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1TensorIntrin.html">TensorIntrin</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttributeAccessPathNode.html">AttributeAccessPathNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQNode.html">EQNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1RecClosure.html">RecClosure</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::rel [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrInitEntry.html">AttrInitEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1ErrorBuilder.html">ErrorBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrInitVisitor.html">AttrInitVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ErrorReporter.html">ErrorReporter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1MapNode.html">MapNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNonDefaultVisitor.html">AttrNonDefaultVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Evaluate.html">Evaluate</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MapValuePath.html">MapValuePath</a> (<a class="el" href="namespacetvm. [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrNopEntry.html">AttrNopEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html">EvaluateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MapValuePathNode.html">MapValuePathNode</a> (<a class="el" href="namespacetvm.h [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNormalVisitor.html">AttrNormalVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Executable.html">Executable</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Match.html">Match</a> (<a class="el"  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPattern.html">AttrPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Executor.html">Executor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegion.html">MatchBufferRegion</a> (<a class="el" href="namespacetvm_ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPatternNode.html">AttrPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html">ExecutorNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegionNode.html">MatchBufferRegionNode</a> (<a class= [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistry.html">AttrRegistry</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExecutorRegEntry.html">ExecutorRegEntry</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MatchNode.html">MatchNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::r [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistryMap.html">AttrRegistryMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ExpandDimsAttrs.html">ExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatmulAttrs.html">MatmulAttrs</a> (<a class="el" href="namespacetvm_1_1relay.ht [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistryMapContainerMap.html">AttrRegistryMapContainerMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1ExprDeepEqual.html">ExprDeepEqual</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html">MatrixSetDiagAttrs</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Attrs.html">Attrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html">ExprDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Max.html">Max</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;& [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrsNode.html">AttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html">ExprDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MaxNode.html">MaxNode</a> (<a class="el" href="namespacetvm_1_1tir.ht [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html">MaxPool1DAttrs</a> (<a class="el" href= [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSHashVisitor.html">AttrsSHashVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a class="el" h [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmt.html">AttrStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_ [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html">AttrTriggerNonDefaultEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallback.html">MeasureCallb [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html">MeasureCallbackNode</a> (<a class="el" href="namespacetvm_1_1m [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AutoSchedulerLayoutTransformAttrs.html">AutoSchedulerLayoutTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html">ExprPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallbackNo [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool1DAttrs.html">AvgPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprPatternNode.html">ExprPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidate.html">MeasureCandidate</a> (<a  [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool2DAttrs.html">AvgPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html">ExprRewriter</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidateNode.html">MeasureCandidateNode</a> (< [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool3DAttrs.html">AvgPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDoc.html">ExprStmtDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInput.html">Me [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArgReduceAttrs.html">ArgReduceAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticBuilder.html">DiagnosticBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1LENode.html">LENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ArgsortAttrs.html">ArgsortAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticContext.html">DiagnosticContext</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Let.html">Let</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160; [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Array.html">Array</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticContextNode.html">DiagnosticContextNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)& [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayAccessor.html">ArrayAccessor</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticNode.html">DiagnosticNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetNode.html">LetNode</a> (<a class="el" href=" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayAccessor_3_01const_01char_01_5_00_01_1_1tvm_1_1runtime_1_1String_01_4.html">ArrayAccessor&lt; const char *, ::tvm::runtime::String &gt;</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticRenderer.html">DiagnosticRenderer</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;& [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1ArrayHandler.html">SimpleObjAllocator::ArrayHandler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DiagnosticRendererNode.html">DiagnosticRendererNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetPattern.html">LetPatte [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ArrayIndexPath.html">ArrayIndexPath</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrs.html">DictAttrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1LetPatternNode.html">LetPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ArrayIndexPathNode.html">ArrayIndexPathNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1DictAttrsNode.html">DictAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetStmt.html">LetStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayIterator.html">ArrayIterator</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDoc.html">DictDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LetS [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ArrayNode.html">ArrayNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DictDocNode.html">DictDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1support_1_1LinearCongruentialEngine.html">Lin [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDoc.html">AssertDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DilateAttrs.html">DilateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDoc.html">ListDoc</a> (<a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssertDocNode.html">AssertDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html">Dilation2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ListDocNode.h [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrameNode.html">AssertFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1tir.html">tvm::script::ir_builder::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Div.html">Div</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDoc [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmt.html">AssertStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1DivNode.html">DivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1LiteralDocNode.html">LiteralDocNode</a> (<a class="el" href="namespacetvm_1_1scrip [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1TempExpr.html">TempExpr</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AssertStmtNode.html">AssertStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1Doc.html">Doc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Load.html">Load</a> (<a class="el" href="namespacetvm_1_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDoc.html">AssignDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1DocNode.html">DocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoadNode.html">Loa [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AssignDocNode.html">AssignDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html">DominatorPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilder.h [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1TensorAffineType.html">TensorAffineType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMap.html">AttachMap</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1DominatorPatternNode.html">DominatorPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalBuilderNode.ht [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1AttachMapNode.html">AttachMapNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html">DropoutAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1LocalRunner.html">LocalRun [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDoc.html">AttrAccessDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1DurationNode.html">DurationNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1AttrAccessDocNode.html">AttrAccessDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1DynExpandDimsAttrs.html">DynExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoopRV.html"> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocEntry.html">AttrDocEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_e"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;e&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LoopRVNode.html">LoopRVNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1RandomModelNode.html">RandomModelNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1TensorInfo.html">TensorInfo< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrDocVisitor.html">AttrDocVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html">LRNAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Range.html">Range</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;& [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1AttrError.html">AttrError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1EinsumAttrs.html">EinsumAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LT.html">LT</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><t [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrExistVisitor.html">AttrExistVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFunc.html">EnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1LTNode.html">LTNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfo.html">AttrFieldInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1EnvFuncNode.html">EnvFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_m"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;m&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Rebase.html">Rebase</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TensorIntrin.html">TensorIntrin</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrFieldInfoNode.html">AttrFieldInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQ.html">EQ</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1RebaseNode.html">RebaseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttributeAccessPath.html">AttributeAccessPath</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EQNode.html">EQNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Map.html">Map</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttributeAccessPathNode.html">AttributeAccessPathNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1ErrorBuilder.html">ErrorBuilder</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1MapNode.html">MapNode</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a> [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrInitEntry.html">AttrInitEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ErrorReporter.html">ErrorReporter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MapValuePath.html">MapValuePath</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrInitVisitor.html">AttrInitVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Evaluate.html">Evaluate</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MapValuePathNode.html">MapValuePathNode</a> (<a class="el" href="namespacetvm.html [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNonDefaultVisitor.html">AttrNonDefaultVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1EvaluateNode.html">EvaluateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Match.html">Match</a> (<a class="el" href="namespacet [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrNopEntry.html">AttrNopEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1Executable.html">Executable</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegion.html">MatchBufferRegion</a> (< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrNormalVisitor.html">AttrNormalVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Executor.html">Executor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MatchBufferRegionNode.html">MatchBufferRegionNode</a> (<a class=" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPattern.html">AttrPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html">ExecutorNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MatchNode.html">MatchNode</a> (<a class="el" href="namespacetvm_1_1rel [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1AttrPatternNode.html">AttrPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExecutorRegEntry.html">ExecutorRegEntry</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatmulAttrs.html">MatmulAttrs</a> (<a class="el" href [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistry.html">AttrRegistry</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ExpandDimsAttrs.html">ExpandDimsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html">MatrixSetDiagAttrs</a> (<a class="el" href="namespacetvm_1_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistryMap.html">AttrRegistryMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1ExprDeepEqual.html">ExprDeepEqual</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Max.html">Max</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&# [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrRegistryMapContainerMap.html">AttrRegistryMapContainerMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDoc.html">ExprDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MaxNode.html">MaxNode</a> (<a class="el"  [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Attrs.html">Attrs</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprDocNode.html">ExprDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html">MaxPool1DAttrs</a> (<a class="el" href="namespacetvm_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrsNode.html">AttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html">MaxPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&# [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSEqualVisitor.html">AttrsSEqualVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html">ExprFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html">MaxPool3DAttrs</a> (<a class="el" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1detail_1_1AttrsSHashVisitor.html">AttrsSHashVisitor</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmt.html">AttrStmt</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprFunctor_3_01R_07const_01PrimExpr_01_6n_00_01Args_8_8_8_08_4.html">ExprFunctor&lt; R(const PrimExpr &amp;n, Args...)&gt;</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto_ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1AttrStmtNode.html">AttrStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCallbackNode.html">MeasureCallbackNode</a> (<a class="el" href="na [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1detail_1_1AttrTriggerNonDefaultEntry.html">AttrTriggerNonDefaultEntry</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html">ExprMutator</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureCallbackNode.html">Me [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1AttrVisitor.html">AttrVisitor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html">ExprPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCandidate.html">MeasureCandidate</a> (<a class="el" href="namespacetvm_1_1meta__s [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AutoSchedulerLayoutTransformAttrs.html">AutoSchedulerLayoutTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprPatternNode.html">ExprPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MeasureCan [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool1DAttrs.html">AvgPool1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html">ExprRewriter</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInput.html">MeasureInput</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool2DAttrs.html">AvgPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDoc.html">ExprStmtDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInputNode.html [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1AvgPool3DAttrs.html">AvgPool3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html">ExprStmtDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResult [...]
 <tr><td rowspan="2" valign="bottom"><a name="letter_b"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;b&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ExprStmtDocNode.html">ExprStmtDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureInputNode.html">MeasureInputNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResult.html">MeasureResult</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1RelayRefType.html">RelayRefType</a> (<a class="e [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseAttrsNode.html">BaseAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResultNode.html">MeasureResultNode</a> (<a class="el" href="namespacetvm_1_1auto__ [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1BaseComputeOpNode.html">BaseComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ExternOp.html">ExternOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfo.html">MemoryInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;& [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExpr.html">BaseExpr</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ExternOpNode.html">ExternOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfoNode.html">MemoryInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td vali [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExprNode.html">BaseExprNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTask.html">ExtractedTask</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1MemoryManager.html">MemoryManager</a> (<a class="el" href [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFunc.html">BaseFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTaskNode.html">ExtractedTaskNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structMemoryManagerInterface.html">MemoryManagerInterface</a>&#160;&#160;&#160;</td><t [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFuncNode.html">BaseFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncObj_1_1Extractor.html">PackedFuncObj::Extractor</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MeshgridAttrs.html">MeshgridAttrs</a> (<a class="el" href=" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorType.html">BaseTensorType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_f"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;f&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1Metadata.html">Metadata</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ReprPrinter.html">ReprPrinter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1TransformNode.html">TransformNode</a> (<a class="el" href="namesp [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorTypeNode.html">BaseTensorTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArray.html">MetadataArray</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1qnn_1_1RequantizeAttrs.html">Requantiz [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueEqual.html">BaseValueEqual</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractor.html">FeatureExtractor</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArrayNode.html">MetadataArrayNode [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueHash.html">BaseValueHash</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractorNode.html">FeatureExtractorNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataBase.html">MetadataBase</a> [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchMatmulAttrs.html">BatchMatmulAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FeatureSet.html">FeatureSet</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataBaseNode.html">MetadataBaseNode</a> (<a c [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchNormAttrs.html">BatchNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1FIFOBufferAttrs.html">FIFOBufferAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrame.html">MetadataFrame</a> (<a cl [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchToSpaceNDAttrs.html">BatchToSpaceNDAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1FixedPointMultiplyAttrs.html">FixedPointMultiplyAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrameNode. [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BiasAddAttrs.html">BiasAddAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html">SeqStmt::Flattener</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataNode.html">MetadataNode</a> (<a class [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayout.html">BijectiveLayout</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImm.html">FloatImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MetaScheduleLayoutTransformAttrs.html">MetaScheduleLayoutTransformAttrs</a> (<a class="el" href="nam [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayoutNode.html">BijectiveLayoutNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImmNode.html">FloatImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollector.html">MetricCollector</a> (<a class="el" href="namespa [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryConv2DAttrs.html">BinaryConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorDiv.html">FloorDiv</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html">MetricCollectorNode</a> (<a cl [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryDenseAttrs.html">BinaryDenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html">FloorDivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Min.html">Min</a> (<a class="el" href="namespacetvm_1_1tir.html">tv [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BinaryOpNode.html">BinaryOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorMod.html">FloorMod</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MinNode.html">MinNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#16 [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BitPackAttrs.html">BitPackAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorModNode.html">FloorModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html">MirrorPadAttrs</a> (<a class="el" href="namespacetv [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Block.html">Block</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStep.html">FollowFusedSplitStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingArrayElementPath.html">MissingArrayElementPat [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1BlockInfo.html">BlockInfo</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode.html">FollowFusedSplitStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingArrayElementPathNode.html">M [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockNode.html">BlockNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStep.html">FollowSplitStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingMapEntryPath.html">MissingMapEntryPath</a> (<a  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRealize.html">BlockRealize</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStepNode.html">FollowSplitStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingMapEntryPathNode.html">MissingMap [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRealizeNode.html">BlockRealizeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1For.html">For</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html">MixedModeMutator</a> (<a class="el" href="namespacetvm_1_1relay.htm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRV.html">BlockRV</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDoc.html">ForDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html">MixedModeVisitor</a> (<a class="el" hre [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRVNode.html">BlockRVNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html">ForDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Mod.html">Mod</a> (<a class="el" href="namespace [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockScope.html">BlockScope</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ForNode.html">ForNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ModNode.html">ModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#16 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockScopeNode.html">BlockScopeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1Frame.html">Frame</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSet.html">ModularSet</a> (<a class="el" hre [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1Bool.html">Bool</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1FrameBuffer.html">FrameBuffer</a> (<a class="el" href="namespacetvm_1_1runtime_1_1micro__rpc.html">tvm::runtime::micro_rpc</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetAnalyzer.html">ModularSetAnalyzer</a> (<a class="el" hre [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Broadcast.html">Broadcast</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html">FrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetNode.html">ModularSetNode</a> (<a class="e [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1qnn_1_1BroadcastAttrs.html">BroadcastAttrs</a> (<a class="el" href="namespacetvm_1_1relay_1_1qnn.html">tvm::relay::qnn</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1Framer.html">Framer</a> (<a class="el" href="namespacetvm_1_1runtime_1_1micro__rpc.html">tvm::runtime::micro_rpc</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Module.ht [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BroadcastNode.html">BroadcastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1StringObj_1_1FromStd.html">StringObj::FromStd</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ModuleNode.html">ModuleNode</a> (<a class="el" hre [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1vm_1_1Buffer.html">Buffer</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ShapeTupleObj_1_1FromStd.html">ShapeTupleObj::FromStd</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Mul.html">Mul</a> (<a class="el [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Function.html">Function</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MulNode.html">MulNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#16 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfo.html">BufferInfo</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDoc.html">FunctionDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysis.html">BufferInfoAnalysis</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html">FunctionDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_ [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysisNode.html">BufferInfoAnalysisNode</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html">FunctionNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultinomialAttrs.html">Multino [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1BufferInfoNode.html">BufferInfoNode</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionPattern.html">FunctionPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Mutator.html">Mutator</a> (<a cl [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionPatternNode.html">FunctionPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MutatorNode.html">MutatorNode</a> (<a class="el" href=" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html">BufferLoadNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncType.html">FuncType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_n"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;n&#160;&#160;</div>< [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1RuntimeRegEntry.html">RuntimeRegEntry</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structTVMModule.html">TVMModule</a>&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferNode.html">BufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncTypeNode.html">FuncTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_s"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;s&#160;&#160;</div>< [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1TVMMovableArgValue__.html">TVMMovableArgValue_</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Fuse.html">Fuse</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1details_1_1Namer.html">Namer</a> (<a class="el" href="namespacetvm_1_1script_1_1i [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1FuseNode.html">FuseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NameSupply.html">NameSupply</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#16 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegion.html">BufferRegion</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStep.html">FuseStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NameSupplyNode.html">NameSupplyNode</a> (<a class="el" href="n [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegionNode.html">BufferRegionNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStepNode.html">FuseStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray.html">NDArray</a> (<a class [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_g"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;g&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="structtvm_1_1NDArrayContainerTrait.html">NDArrayContainerTrait</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ScatterAddAttrs.html">ScatterAddAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structTVMParallelGroupEnv.html">TVMParallelGroupEnv</a>&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html">NdarraySizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ScatterAttrs.html">ScatterAttrs</a> (<a class="el" href="n [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Builder.html">Builder</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherAttrs.html">GatherAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NE.html">NE</a> (<a class="el" href="namespacetvm_1_1ti [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInput.html">BuilderInput</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherNDAttrs.html">GatherNDAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NENode.html">NENode</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInputNode.html">BuilderInputNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GE.html">GE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NLLLossAttrs.html">NLLLossAttrs</a> (<a class="el" href="na [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderNode.html">BuilderNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFunc.html">GenericFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor.html">NodeFunctor</a> (<a class="el" href="namespacetvm.html">tvm</a [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResult.html">BuilderResult</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFuncNode.html">GenericFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.htm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResultNode.html">BuilderResultNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GENode.html">GENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html">NonMaximumSuppre [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResult.html">BuildResult</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html">GetValidCountsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NormalAttrs.html">NormalA [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResultNode.html">BuildResultNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html">GlobalPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Not.html">Not</a> (<a cl [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1MeasureResultNode.html">MeasureResultNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1RelayRefType.html">RelayRefType</a> (<a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html">ExprVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfo.html">MemoryInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1RelayRefTypeNode.html">RelayRefTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseAttrsNode.html">BaseAttrsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ExternOp.html">ExternOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MemoryInfoNode.html">MemoryInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td va [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1BaseComputeOpNode.html">BaseComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1ExternOpNode.html">ExternOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1vm_1_1MemoryManager.html">MemoryManager</a> (<a class="el" href="namespace [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExpr.html">BaseExpr</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTask.html">ExtractedTask</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structMemoryManagerInterface.html">MemoryManagerInterface</a>&#160;&#160;&#160;</td><td valign [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseExprNode.html">BaseExprNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ExtractedTaskNode.html">ExtractedTaskNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MeshgridAttrs.html">MeshgridAttrs</a> (<a class="el" hre [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFunc.html">BaseFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncObj_1_1Extractor.html">PackedFuncObj::Extractor</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1Metadata.html">Metadata</a> (<a class="el" href="names [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseFuncNode.html">BaseFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_f"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;f&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArray.html">MetadataArray</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ReprPrinter.html">ReprPrinter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Transform.html">Transform</a> (<a class="el" href="name [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorType.html">BaseTensorType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataArrayNode.html">MetadataArrayNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1qnn_1_1RequantizeAttrs.html">Requantiz [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseTensorTypeNode.html">BaseTensorTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractor.html">FeatureExtractor</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataBase.html">MetadataBase</ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueEqual.html">BaseValueEqual</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1FeatureExtractorNode.html">FeatureExtractorNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataBaseNode.html">MetadataBa [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1BaseValueHash.html">BaseValueHash</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FeatureSet.html">FeatureSet</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrame.html">MetadataFrame</a> (<a class="el" href="namespacetvm_1_1script_1 [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchMatmulAttrs.html">BatchMatmulAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1FIFOBufferAttrs.html">FIFOBufferAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1MetadataFrameNode.html">MetadataFrameNod [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchNormAttrs.html">BatchNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1FixedPointMultiplyAttrs.html">FixedPointMultiplyAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1MetadataNode.html">Metadat [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BatchToSpaceNDAttrs.html">BatchToSpaceNDAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1SeqStmt_1_1Flattener.html">SeqStmt::Flattener</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MetaScheduleLayoutTransformAttrs.html">MetaS [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BiasAddAttrs.html">BiasAddAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImm.html">FloatImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollector.html">MetricCollector</a> (<a class="el" href="namespacetvm_1_1runtim [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayout.html">BijectiveLayout</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FloatImmNode.html">FloatImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1MetricCollectorNode.html">MetricCollectorNode</a> (<a class="el" href="namespa [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BijectiveLayoutNode.html">BijectiveLayoutNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorDiv.html">FloorDiv</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Min.html">Min</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryConv2DAttrs.html">BinaryConv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorDivNode.html">FloorDivNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MinNode.html">MinNode</a> (<a class="el" href="namespacetvm_1_1ti [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BinaryDenseAttrs.html">BinaryDenseAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorMod.html">FloorMod</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html">MirrorPadAttrs</a> (<a class="el" href="namespacetv [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BinaryOpNode.html">BinaryOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1FloorModNode.html">FloorModNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingArrayElementPath.html">MissingArrayElementPath</a> (<a class="el" href="namespacet [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1BitPackAttrs.html">BitPackAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStep.html">FollowFusedSplitStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingArrayElementPathNode.htm [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Block.html">Block</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowFusedSplitStepNode.html">FollowFusedSplitStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingMapEntryPath.html">MissingMapEntryPat [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1BlockInfo.html">BlockInfo</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStep.html">FollowSplitStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1MissingMapEntryPathNode.html">MissingMapEntryPathNode [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockNode.html">BlockNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FollowSplitStepNode.html">FollowSplitStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html">MixedModeMutat [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRealize.html">BlockRealize</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1For.html">For</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html">MixedModeVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm:: [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRealizeNode.html">BlockRealizeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDoc.html">ForDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Mod.html">Mod</a> (<a class="el" href="namespa [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRV.html">BlockRV</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ForDocNode.html">ForDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ModNode.html">ModNode</a> (<a class="el" href="namespace [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockRVNode.html">BlockRVNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ForNode.html">ForNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSet.html">ModularSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith< [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockScope.html">BlockScope</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1Frame.html">Frame</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetAnalyzer.html">ModularSetAnalyzer</a> (<a class= [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BlockScopeNode.html">BlockScopeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1FrameBuffer.html">FrameBuffer</a> (<a class="el" href="namespacetvm_1_1runtime_1_1micro__rpc.html">tvm::runtime::micro_rpc</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ModularSetNode.html">Modula [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Bool.html">Bool</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1FrameNode.html">FrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Module.html">Module</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm: [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Broadcast.html">Broadcast</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1Framer.html">Framer</a> (<a class="el" href="namespacetvm_1_1runtime_1_1micro__rpc.html">tvm::runtime::micro_rpc</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ModuleNode.html">ModuleNode</a> (<a class="el [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1qnn_1_1BroadcastAttrs.html">BroadcastAttrs</a> (<a class="el" href="namespacetvm_1_1relay_1_1qnn.html">tvm::relay::qnn</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ShapeTupleObj_1_1FromStd.html">ShapeTupleObj::FromStd</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Mul.html">Mul</a>  [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BroadcastNode.html">BroadcastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1StringObj_1_1FromStd.html">StringObj::FromStd</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1MulNode.html">MulNode</a> (<a class="el" href="namespa [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1vm_1_1Buffer.html">Buffer</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Function.html">Function</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html">MultiBoxPriorAttrs</a> (<a class="el" h [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Buffer.html">Buffer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDoc.html">FunctionDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html">MultiBoxTransformLocA [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfo.html">BufferInfo</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1FunctionDocNode.html">FunctionDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1MultinomialAttr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysis.html">BufferInfoAnalysis</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html">FunctionNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Mutator.html">Mutator</a> (<a c [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1BufferInfoAnalysisNode.html">BufferInfoAnalysisNode</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionPattern.html">FunctionPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1MutatorNode.html [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1BufferInfoNode.html">BufferInfoNode</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp.html">tvm::tir::usmp</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1FunctionPatternNode.html">FunctionPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_n"></a><table border="0" cellspacing="0" ce [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html">RuntimeNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structTVMGraphExecutorGraphAttr.html">TVMGraphExecutorGraphAttr</a>&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoad.html">BufferLoad</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncType.html">FuncType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1RuntimeRegEntry.html">RuntimeRegEntry</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferLoadNode.html">BufferLoadNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1FuncTypeNode.html">FuncTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1details_1_1Namer.html">Namer</a> (<a class="el" href="namespacetvm_1_1script_1_ [...]
+</td><td valign="top"><a class="el" href="structTVMModule.html">TVMModule</a>&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferNode.html">BufferNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Fuse.html">Fuse</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NameSupply.html">NameSupply</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valig [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealize.html">BufferRealize</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1FuseNode.html">FuseNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NameSupplyNode.html">NameSupplyNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#16 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRealizeNode.html">BufferRealizeNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStep.html">FuseStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray.html">NDArray</a> (<a class="el"  [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegion.html">BufferRegion</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1FuseStepNode.html">FuseStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1NDArrayContainerTrait.html">NDArrayContainerTrait</a> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferRegionNode.html">BufferRegionNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_g"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;g&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html">NdarraySizeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ScatterAddAttrs.html">ScatterAddAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structTVMPackedFunc.html">TVMPackedFunc</a>&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStore.html">BufferStore</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NE.html">NE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ScatterAttrs.html">ScatterAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)& [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1BufferStoreNode.html">BufferStoreNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherAttrs.html">GatherAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NENode.html">NENode</a> (<a class="el" href="namespacetvm_1_1tir.html"> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1Builder.html">Builder</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GatherNDAttrs.html">GatherNDAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NLLLossAttrs.html">NLLLossAttrs</a> (<a class="e [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInput.html">BuilderInput</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GE.html">GE</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor.html">NodeFunctor</a> (<a class="el" href="namespacetvm.html">tvm [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderInputNode.html">BuilderInputNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFunc.html">GenericFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderNode.html">BuilderNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GenericFuncNode.html">GenericFuncNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html">NonMaximumSuppressionAttrs</a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResult.html">BuilderResult</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GENode.html">GENode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1NormalAttrs.html">NormalAttrs</a> (<a class="el" href="na [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1BuilderResultNode.html">BuilderResultNode</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html">GetValidCountsAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Not.html">Not</a> ( [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResult.html">BuildResult</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html">GlobalPool2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NotNode.html">NotNode</a> (<a cl [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1BuildResultNode.html">BuildResultNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVar.html">GlobalTypeVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1NullOptType.html">NullOptType</a> (<a class="el" href [...]
 <tr><td rowspan="2" valign="bottom"><a name="letter_c"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;c&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVar.html">GlobalTypeVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1NotNode.html">NotNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ScheduleStateNode.html">ScheduleStateNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&# [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVarNode.html">GlobalTypeVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1NullOptType.html">NullOptType</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDoc.html">ScopeDoc</a> (<a class="el" href="namespacetvm_1_1s [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStep.html">CacheReadStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classGlobalVar.html">GlobalVar</a>&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_o"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;o&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html">ScopeDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypedEnvFunc.html">TypedEnvFunc</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStepNode.html">CacheReadStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVar.html">GlobalVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1ScopedTimer.html">ScopedTimer</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStep.html">CacheWriteStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarNode.html">GlobalVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjAllocatorBase.html">ObjAllocatorBase</a> (<a class="e [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStepNode.html">CacheWriteStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarSupply.html">GlobalVarSupply</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Object.html">Object</a> (<a class="el" href= [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Call.html">Call</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarSupplyNode.html">GlobalVarSupplyNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectEqual.html">ObjectEqual</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime< [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Call.html">Call</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1algo_1_1GreedyBase.html">GreedyBase</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp_1_1algo.html">tvm::tir::usmp::algo</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectHash.html">ObjectHash</a> (<a class="el"  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDoc.html">CallDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GridSampleAttrs.html">GridSampleAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a> (<a class="el" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDocNode.html">CallDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GroupNormAttrs.html">GroupNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPathNode.html">ObjectPathNode</a>  [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1profiling_1_1CallFrame.html">CallFrame</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GT.html">GT</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPathPair.html">ObjectPathPair</a> (<a class="el" href="name [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CallLoweredAttrs.html">CallLoweredAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GTNode.html">GTNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPathPairNode.html">ObjectPathPairNode</a> (<a class="el" href="namespacetvm.html [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_h"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;h&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectPtr.html">ObjectPtr</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTask.html">SearchTask</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01DataType_01_4.html">TypeNam [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrEqual.html">ObjectPtrEqual</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTaskNode.html">SearchTaskNode</a> (<a class="el"  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPattern.html">CallPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrHash.html">ObjectPtr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPatternNode.html">CallPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a> (<a class="el" href="namespacetvm_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1CanonicalSimplifier.html">CanonicalSimplifier</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker.html">ObjectTypeChecker</a> (<a clas [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Cast.html">Cast</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html">Handler&lt; DLDataType &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Array_3_01T_01_4 [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastAttrs.html">CastAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html">Handler&lt; DLDevice &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Map [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastHintAttrs.html">CastHintAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParams.html">HardwareParams</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html">OnDeviceAtt [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CastNode.html">CastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParamsNode.html">HardwareParamsNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html">OneHotAttrs</a> (<a cl [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDoc.html">ClassDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1HybridOp.html">HybridOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Op.html">Op</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#16 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDocNode.html">ClassDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1HybridOpNode.html">HybridOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpAttrMap.html">OpAttrMap</a> (<a class="el" href="n [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Clause.html">Clause</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_i"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;i&#160;&#160;</div></td></tr></table>
-</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Operation.html">Operation</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1SeqStmtNode.html">SeqStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeReporter.html">TypeReporter</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ClauseNode.html">ClauseNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDoc.html">OperationDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer.html">SEqualReducer</a> (<a class [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ClipAttrs.html">ClipAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Id.html">Id</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDocNode.html">OperationDocNode</a> (<a class="el" href="namespacetvm_1_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Closure.html">Closure</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDoc.html">IdDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1OperationNode.html">OperationNode</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ClosureObj.html">ClosureObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html">IdDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html">OpImplementatio [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CmpOpNode.html">CmpOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IdNode.html">IdNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html">OpImplementationNode</a> (<a class="el" href="namespacetvm_1_1rel [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1GlobalTypeVarNode.html">GlobalTypeVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_o"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;o&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDoc.html">ScopeDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeData.html">TypeData</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classGlobalVar.html">GlobalVar</a>&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ScopeDocNode.html">ScopeDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeDataNode.html">TypeDataNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStep.html">CacheReadStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVar.html">GlobalVar</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjAllocatorBase.html">ObjAllocatorBase</a> (<a class="el" href="n [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheReadStepNode.html">CacheReadStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarNode.html">GlobalVarNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Object.html">Object</a> (<a class="el" href="names [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStep.html">CacheWriteStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarSupply.html">GlobalVarSupply</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectEqual.html">ObjectEqual</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1CacheWriteStepNode.html">CacheWriteStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1GlobalVarSupplyNode.html">GlobalVarSupplyNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectHash.html">ObjectHash</a> (<a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Call.html">Call</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1algo_1_1GreedyBase.html">GreedyBase</a> (<a class="el" href="namespacetvm_1_1tir_1_1usmp_1_1algo.html">tvm::tir::usmp::algo</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a> (<a class="el" href="namespacetvm [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Call.html">Call</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GridSampleAttrs.html">GridSampleAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPathNode.html">ObjectPathNode</a> (<a class="el" href="namespacetvm.html">tvm</a [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDoc.html">CallDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1GroupNormAttrs.html">GroupNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPathPair.html">ObjectPathPair</a> (<a clas [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1CallDocNode.html">CallDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GT.html">GT</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ObjectPathPairNode.html">ObjectPathPairNode</a> (<a class="el" href="na [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1profiling_1_1CallFrame.html">CallFrame</a> (<a class="el" href="namespacetvm_1_1runtime_1_1profiling.html">tvm::runtime::profiling</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1GTNode.html">GTNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectPtr.html">ObjectPtr</a> (<a class="el" h [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CallLoweredAttrs.html">CallLoweredAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_h"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;h&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrEqual.html">ObjectPtrEqual</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTask.html">SearchTask</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1TypeName_3_01bool_01_4.html"> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectPtrHash.html">ObjectPtrHash</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1SearchTaskNode.html">SearchTaskNode</a> (<a class="el" href="na [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallNode.html">CallNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1SimpleObjAllocator_1_1Handler.html">SimpleObjAllocator::Handler</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ObjectRef.html">ObjectRef</a> (<a cl [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPattern.html">CallPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html">SHashReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker.html">ObjectTypeChecker</a> (<a class="el" href="names [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1CallPatternNode.html">CallPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html">SEqualReducer::Handler</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1ObjectTypeChecker_3_01Array_3_01T_01_4_01_4.html">ObjectTypeCh [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1CanonicalSimplifier.html">CanonicalSimplifier</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDataType_01_4.html">Handler&lt; DLDataType &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1Ob [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Cast.html">Cast</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structdmlc_1_1serializer_1_1Handler_3_01DLDevice_01_4.html">Handler&lt; DLDevice &gt;</a> (<a class="el" href="namespacedmlc_1_1serializer.html">dmlc::serializer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html">OnDeviceAttrs</a> (<a cl [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastAttrs.html">CastAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParams.html">HardwareParams</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html">OneHotAttrs</a> (<a c [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CastHintAttrs.html">CastHintAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1HardwareParamsNode.html">HardwareParamsNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Op.html">Op</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CastNode.html">CastNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1HybridOp.html">HybridOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpAttrMap.html">OpAttrMap</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td val [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDoc.html">ClassDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1HybridOpNode.html">HybridOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Operation.html">Operation</a> (<a class="el" href="nam [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1ClassDocNode.html">ClassDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_i"></a><table border="0" cellspacing="0" cellpadding="0"><tr><td><div class="ah">&#160;&#160;i&#160;&#160;</div></td></tr></table>
+</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDoc.html">OperationDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1SeqStmtNode.html">SeqStmtNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1TypeRelationNode.html">TypeRelationNode</a> (<a cl [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Clause.html">Clause</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1OperationDocNode.html">OperationDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1SEqualReducer.html">SEqualReducer</a> (<a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ClauseNode.html">ClauseNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Id.html">Id</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1OperationNode.html">OperationNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a> [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ClipAttrs.html">ClipAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDoc.html">IdDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html">OpImplementation</a> (<a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Closure.html">Closure</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IdDocNode.html">IdDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html">OpImplementationN [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1ClosureObj.html">ClosureObj</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IdNode.html">IdNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpNode.html">OpNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CmpOpNode.html">CmpOpNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1If.html">If</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpRegEntry.html">OpRegEntry</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td va [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducer.html">CommReducer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1If.html">If</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpNode.html">OpNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html">CommReducerNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDoc.html">IfDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1OpRegEntry.html">OpRegEntry</a> (<a class="el" href="name [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1CompilationConfig.html">CompilationConfig</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html">IfDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html">OpSpecialization</a> (<a class=" [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1CompilationConfigNode.html">CompilationConfigNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfNode.html">IfNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html">OpSpecializationNode</a> (<a class="el" href="namespacetvm_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1CompileError.html">CompileError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfPattern.html">IfPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html">OpStrategy</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#1 [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html">CompilerAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfPatternNode.html">IfPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html">OpStrategyNode</a> (<a class="el" href="na [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStep.html">ComputeAtStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElse.html">IfThenElse</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Optional.html">Optional</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html">ComputeAtStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html">IfThenElseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Or.html">Or</a> (<a class="el" hr [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html">ComputeDAG</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html">ImplSEqualReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1OrNode.html">OrNode</a> (<a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducer.html">CommReducer</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDoc.html">IfDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html">OpSpecialization</a> (<a class="e [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1tir_1_1CommReducerNode.html">CommReducerNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IfDocNode.html">IfDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html">OpSpecializat [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1CompilationConfig.html">CompilationConfig</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfNode.html">IfNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html">OpStrategy</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a> [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1CompilationConfigNode.html">CompilationConfigNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfPattern.html">IfPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html">OpStrategyNode</a> (<a class="el" href="namespacetvm_1_1rela [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1CompileError.html">CompileError</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1IfPatternNode.html">IfPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1Optional.html">Optional</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runti [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html">CompilerAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElse.html">IfThenElse</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Or.html">Or</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)& [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStep.html">ComputeAtStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IfThenElseNode.html">IfThenElseNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1OrNode.html">OrNode</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeAtStepNode.html">ComputeAtStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce.html">ImplSEqualReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="letter_p"></a><table border="0" ce [...]
+</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Shuffle.html">Shuffle</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1UpSamplingAttrs.html">UpSamplingAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td></tr>
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAG.html">ComputeDAG</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1ti [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html">ComputeDAGNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSEqualReduce_3_01T_00_01true_01_4.html">ImplSEqualReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td rowspan="2" valign="bottom"><a name="le [...]
-</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1ShuffleNode.html">ShuffleNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html">ComputeInlineStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1SignaturePrinter [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html">ComputeInlineStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el"  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOp.html">ComputeOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFuncObj.html">PackedFuncObj</a> (<a class="el" href="namespacetvm_1 [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html">ComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs_3_01T_00_01true_01_4.html">ImplVisitAttrs&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFuncSubObj.html">Pack [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStep.html">ComputeRootStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteType.html">IncompleteType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">PackedFuncValueConve [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStepNode.html">ComputeRootStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteTypeNode.html">IncompleteTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Option [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConcatenateAttrs.html">ConcatenateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDoc.html">IndexDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01P [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html">IndexDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Boo [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ConstantInfo.html">ConstantInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IndexMap.html">IndexMap</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01tvm_1_1Integer_01_4.html">PackedFuncValueConverter&lt; tvm::Integer &gt;</a [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ConstantInfoMetadata.html">ConstantInfoMetadata</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IndexMapNode.html">IndexMapNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValue [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ConstantInfoMetadataNode.html">ConstantInfoMetadataNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html">InitOpAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1mic [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1ConstantInfoNode.html">ConstantInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1InplaceArrayBase.html">InplaceArrayBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html">PadAttrs</a> (<a class="el" href="namespacetvm_1_1rela [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ConstantMemoryPools.html">ConstantMemoryPools</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html">InstanceNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1Pass.html">Pass</a> (<a class="el" href="namespacetvm_1_1transfo [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1ConstantMemoryPoolsNode.html">ConstantMemoryPoolsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Instruction.html">Instruction</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContext.html">PassContext</a> (<a class="el" href="namespacetvm_1_1tra [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantNode.html">ConstantNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1vm_1_1Instruction.html">Instruction</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContextNode.html">PassContextNode</a> ( [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantPattern.html">ConstantPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionKind.html">InstructionKind</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassInfo.html">PassInfo</a> (<a class="el" href="namespace [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantPatternNode.html">ConstantPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionKindNode.html">InstructionKindNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassInfoNode.html">PassInfoNode</a> (<a cl [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ConstantPoolInfo.html">ConstantPoolInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionKindRegEntry.html">InstructionKindRegEntry</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1instrument_1_1PassInstrument.html">PassInstrument</a> (<a class="el" href="nam [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1ConstantPoolInfoNode.html">ConstantPoolInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionNode.html">InstructionNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html">PassInstrumentNode</a> (<a class="el" href="na [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBound.html">ConstIntBound</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraints.html">IntConstraints</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassNode.html">PassNode</a> (<a class="el" href="namespace [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html">ConstIntBoundAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html">IntConstraintsNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Pattern.html">Pattern</a> (<a class="e [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundNode.html">ConstIntBoundNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransform.html">IntConstraintsTransform</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html">PatternCons [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstraintContext.html">ConstraintContext</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html">IntConstraintsTransformNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeDAGNode.html">ComputeDAGNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce.html">ImplSHashReduce</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFunc.html">Packed [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStep.html">ComputeInlineStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplSHashReduce_3_01T_00_01true_01_4.html">ImplSHashReduce&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="cl [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeInlineStepNode.html">ComputeInlineStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs.html">ImplVisitAttrs</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1PackedFuncS [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOp.html">ComputeOp</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1ImplVisitAttrs_3_01T_00_01true_01_4.html">ImplVisitAttrs&lt; T, true &gt;</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter.html">Pac [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1te_1_1ComputeOpNode.html">ComputeOpNode</a> (<a class="el" href="namespacetvm_1_1te.html">tvm::te</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteType.html">IncompleteType</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01Optional_3_01T_01_4_01_4.html">PackedFuncValueConverter&lt; Opti [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStep.html">ComputeRootStep</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IncompleteTypeNode.html">IncompleteTypeNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_01PrimExpr_01_4. [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1auto__scheduler_1_1ComputeRootStepNode.html">ComputeRootStepNode</a> (<a class="el" href="namespacetvm_1_1auto__scheduler.html">tvm::auto_scheduler</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDoc.html">IndexDoc</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConcatenateAttrs.html">ConcatenateAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IndexDocNode.html">IndexDocNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConvert [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IndexMap.html">IndexMap</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1PackedFuncValueConverter_3_1_1tvm_1_1runtime_1_1String_01_4.html">PackedFuncValueConver [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ConstantInfo.html">ConstantInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1IndexMapNode.html">IndexMapNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1micro__rpc_1_1PacketFieldSizeBytes.html">PacketFieldSizeBytes</a> (<a class="el" href="namespacet [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ConstantInfoMetadata.html">ConstantInfoMetadata</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html">InitOpAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1PadAttrs.htm [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ConstantInfoMetadataNode.html">ConstantInfoMetadataNode</a> (<a class="el" href="namespacetvm_1_1runtime_1_1metadata.html">tvm::runtime::metadata</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1InplaceArrayBase.html">InplaceArrayBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_ [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1ConstantInfoNode.html">ConstantInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html">InstanceNormAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContext.html">PassContext</a> (<a class="el" href="namespacetvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ConstantMemoryPools.html">ConstantMemoryPools</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1Instruction.html">Instruction</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassContextNode.html">PassContextNode</a> (<a class="el" href="namespacetvm_1_1tran [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1ConstantMemoryPoolsNode.html">ConstantMemoryPoolsNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1runtime_1_1vm_1_1Instruction.html">Instruction</a> (<a class="el" href="namespacetvm_1_1runtime_1_1vm.html">tvm::runtime::vm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassInfo.html">PassInfo</a> (<a class="el" hr [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantNode.html">ConstantNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionKind.html">InstructionKind</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassInfoNode.html">PassInfoNode</a> (<a class="el" href="namespa [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantPattern.html">ConstantPattern</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionKindNode.html">InstructionKindNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1instrument_1_1PassInstrument.html">PassInstrument</a> (<a class [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstantPatternNode.html">ConstantPatternNode</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionKindRegEntry.html">InstructionKindRegEntry</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1instrument_1_1PassInstrumentNode.html">PassInst [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ConstantPoolInfo.html">ConstantPoolInfo</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1InstructionNode.html">InstructionNode</a> (<a class="el" href="namespacetvm_1_1tir.html">tvm::tir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1transform_1_1PassNode.html">PassNode</a> (<a class="el" href="namespacetvm_1_1transform.html"> [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1ConstantPoolInfoNode.html">ConstantPoolInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraints.html">IntConstraints</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1Pattern.html">Pattern</a> (<a class="el" href="namespacetvm_1_1relay.htm [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBound.html">ConstIntBound</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsNode.html">IntConstraintsNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html">PatternConstructor</a> (<a cl [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundAnalyzer.html">ConstIntBoundAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransform.html">IntConstraintsTransform</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html" [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstIntBoundNode.html">ConstIntBoundNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntConstraintsTransformNode.html">IntConstraintsTransformNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html">Pattern [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1arith_1_1ConstraintContext.html">ConstraintContext</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Integer.html">Integer</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html">PatternFunctor&lt; R( [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="classtvm_1_1Constructor.html">Constructor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1Integer.html">Integer</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html">PatternFunctor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ConstructorNode.html">ConstructorNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html">InterpreterClosure</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html">Pat [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html">ConstructorValue</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html">InterpreterClosureObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html">PatternMutator</a> (< [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConstructorValueObj.html">ConstructorValueObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntGroupBounds.html">IntGroupBounds</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternNode.html">PatternNode</a> (<a class="el"  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">NDArray::Container</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntGroupBoundsNode.html">IntGroupBoundsNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html">PatternTuple</a>  [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html">NDArray::ContainerBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImm.html">IntImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html">PatternTupleNode</a> (<a class="el" href="namespacetvm [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1ContextManager.html">ContextManager</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImmNode.html">IntImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DAttrs.html">Conv1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSet.html">IntSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html">PatternVarNode</a> (<a class="el" href="namespacetvm_1_1rela [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html">Conv1DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetAnalyzer.html">IntSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html">PatternVisitor</a> (<a cla [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html">Conv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetNode.html">IntSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> (<a class="el" href="namespacet [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html">Conv2DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html">IRBuilder</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternWildc [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html">Conv2DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html">IRBuilderFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1pr [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html">Conv2DWinogradNNPACKWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html">IRBuilderFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign=" [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html">Conv3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html">IRBuilderNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1PlaceholderOpNode.html">P [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html">Conv3DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html">IRDocsifier</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerType.html">PointerTyp [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html">IRDocsifierNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerTypeNode.html"> [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html">ConvGemmWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModule.html">IRModule</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1PoolAllocation.html">PoolAllocation</a> (<a class="el" href="n [...]
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRModuleFrame.html">IRModuleFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="s [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1Constructor.html">Constructor</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html">InterpreterClosure</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html">PatternMutator</a> (<a class="el" href="namespacetvm_1_1relay. [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ConstructorNode.html">ConstructorNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html">InterpreterClosureObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternNode.html">PatternNode</a> (<a class="el" href="namespacetvm_1 [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html">ConstructorValue</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntGroupBounds.html">IntGroupBounds</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html">PatternTuple</a> (<a class="el" href= [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConstructorValueObj.html">ConstructorValueObj</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntGroupBoundsNode.html">IntGroupBoundsNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html">PatternTupleNode</ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1Container.html">NDArray::Container</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImm.html">IntImm</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm: [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1NDArray_1_1ContainerBase.html">NDArray::ContainerBase</a> (<a class="el" href="namespacetvm_1_1runtime.html">tvm::runtime</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IntImmNode.html">IntImmNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html">PatternVarNode</a> (<a class="el" href="namespac [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1ContextManager.html">ContextManager</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSet.html">IntSet</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html">PatternVisitor</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</ [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DAttrs.html">Conv1DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetAnalyzer.html">IntSetAnalyzer</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> (<a class="el" href="na [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html">Conv1DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1arith_1_1IntSetNode.html">IntSetNode</a> (<a class="el" href="namespacetvm_1_1arith.html">tvm::arith</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1relay_1_1PatternWildcardNode.html">PatternWildcardNode</a> (<a c [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html">Conv2DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilder.html">IRBuilder</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1runtime_1_1profiling_1_1PercentNode.htm [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html">Conv2DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html">IRBuilderFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_1Place [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html">Conv2DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html">IRBuilderFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1te_1_ [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html">Conv2DWinogradNNPACKWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRBuilderNode.html">IRBuilderNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a cl [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html">Conv3DAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifier.html">IRDocsifier</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PointerTypeNode.html">PointerTypeNode</a> (<a  [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html">Conv3DTransposeAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1printer_1_1IRDocsifierNode.html">IRDocsifierNode</a> (<a class="el" href="namespacetvm_1_1script_1_1printer.html">tvm::script::printer</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1tir_1_1usmp_1_1PoolA [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html">Conv3DWinogradAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModule.html">IRModule</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1tir_1_1usmp_1_1PoolAllocationNode.html">PoolAllocationNode</a> (<a class="el" href="namespacet [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html">ConvGemmWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrame.html">IRModuleFrame</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1ir.html">tvm::script::ir_builder::ir</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html">ConvWinogradWeightTransformAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html">IRModuleFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder_1_1ir.html">tvm::script::ir_builder::ir</a>)&#160;&#160;&#160;</td><td valign="to [...]
 </td></tr>
-<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CorrelationAttrs.html">CorrelationAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRModuleFrameNode.html">IRModuleFrameNode</a> (<a class="el" href="namespacetvm_1_1script_1_1ir__builder.html">tvm::script::ir_builder</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PoolInfo.html [...]
-<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModuleNode.html">IRModuleNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1PoolInfoNode.html">PoolInfoNode</a> (<a class="el" href="namespacetvm.html">tvm</ [...]
+<tr><td valign="top"><a class="el" href="structtvm_1_1relay_1_1CorrelationAttrs.html">CorrelationAttrs</a> (<a class="el" href="namespacetvm_1_1relay.html">tvm::relay</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1IRModuleNode.html">IRModuleNode</a> (<a class="el" href="namespacetvm.html">tvm</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="classtvm_1_1PoolInfoProperties.html">PoolInfoProperties</a> (<a class="el" href="namespacetvm.html">tvm</ [...]
+<tr><td valign="top"><a class="el" href="classtvm_1_1meta__schedule_1_1CostModel.html">CostModel</a> (<a class="el" href="namespacetvm_1_1meta__schedule.html">tvm::meta_schedule</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1detail_1_1is__specialized.html">is_specialized</a> (<a class="el" href="namespacetvm_1_1detail.html">tvm::detail</a>)&#160;&#160;&#160;</td><td valign="top"><a class="el" href="structtvm_1_1PoolInfoPropertiesNode.html">PoolInfoProperties [...]
 <tr><td></td><td></td><td></td><td></td><td></td></tr>
 </table>
 <div class="qindex"><a class="qindex" href="#letter_a">a</a>&#160;|&#160;<a class="qindex" href="#letter_b">b</a>&#160;|&#160;<a class="qindex" href="#letter_c">c</a>&#160;|&#160;<a class="qindex" href="#letter_d">d</a>&#160;|&#160;<a class="qindex" href="#letter_e">e</a>&#160;|&#160;<a class="qindex" href="#letter_f">f</a>&#160;|&#160;<a class="qindex" href="#letter_g">g</a>&#160;|&#160;<a class="qindex" href="#letter_h">h</a>&#160;|&#160;<a class="qindex" href="#letter_i">i</a>&#160;|& [...]
diff --git a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html
index 5c7c7b678..af2ec4c30 100644
--- a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html
+++ b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef.html
@@ -81,7 +81,7 @@ $(function() {
 
 <p><code>#include &lt;<a class="el" href="object_8h_source.html">object.h</a>&gt;</code></p>
 
-<p>Inherited by <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Range &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Region &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; T &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; tvm::arith::IterSplitExpr &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt;  [...]
+<p>Inherited by <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Range &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; Region &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; T &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt; tvm::arith::IterSplitExpr &gt;</a>, <a class="el" href="classtvm_1_1runtime_1_1Array.html">tvm::runtime::Array&lt;  [...]
 <div class="dynheader">
 Collaboration diagram for tvm::runtime::ObjectRef:</div>
 <div class="dyncontent">
diff --git a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg
index 698614c7b..04324ea96 100644
--- a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1ObjectRef__coll__graph.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 453)">
 <title>tvm::runtime::ObjectRef</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-453 144,-453 144,4 -4,4"/>
-<!-- Node464 -->
+<!-- Node469 -->
 <g id="node1" class="node">
-<title>Node464</title>
+<title>Node469</title>
 <polygon fill="#bfbfbf" stroke="#000000" points="3,-.5 3,-222.5 137,-222.5 137,-.5 3,-.5"/>
 <text text-anchor="middle" x="70" y="-210.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectRef</text>
 <polyline fill="none" stroke="#000000" points="3,-203.5 137,-203.5 "/>
@@ -34,9 +34,9 @@
 <text text-anchor="start" x="11" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># FFIClearAfterMove()</text>
 <text text-anchor="start" x="11" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetDataPtr()</text>
 </g>
-<!-- Node465 -->
+<!-- Node470 -->
 <g id="node2" class="node">
-<title>Node465</title>
+<title>Node470</title>
 <g id="a_node2"><a xlink:href="classtvm_1_1runtime_1_1ObjectPtr.html" target="_top" xlink:title="{tvm::runtime::ObjectPtr\l\&lt; tvm::runtime::Object \&gt;\n||+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ObjectPtr()\l+ ~ObjectPtr()\l+ swap()\l+ get()\l+ operator&#45;\&gt;()\land 11 more...\l}">
 <polygon fill="#ffffff" stroke="#000000" points="0,-270.5 0,-448.5 140,-448.5 140,-270.5 0,-270.5"/>
 <text text-anchor="start" x="8" y="-436.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::ObjectPtr</text>
@@ -58,9 +58,9 @@
 </a>
 </g>
 </g>
-<!-- Node465&#45;&gt;Node464 -->
+<!-- Node470&#45;&gt;Node469 -->
 <g id="edge1" class="edge">
-<title>Node465&#45;&gt;Node464</title>
+<title>Node470&#45;&gt;Node469</title>
 <path fill="none" stroke="#404040" d="M70,-270.3167C70,-258.8765 70,-247.0062 70,-235.1402"/>
 <polygon fill="none" stroke="#404040" points="70.0001,-234.7944 66,-228.7944 70,-222.7944 74,-228.7943 70.0001,-234.7944"/>
 <text text-anchor="middle" x="89.5" y="-244" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #data_</text>
diff --git a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1Object__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1Object__coll__graph.svg
index a4ea872e2..eb98b8e84 100644
--- a/docs/reference/api/doxygen/classtvm_1_1runtime_1_1Object__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1runtime_1_1Object__coll__graph.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 392)">
 <title>tvm::runtime::Object</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-392 257,-392 257,4 -4,4"/>
-<!-- Node668 -->
+<!-- Node671 -->
 <g id="node1" class="node">
-<title>Node668</title>
+<title>Node671</title>
 <polygon fill="#bfbfbf" stroke="#000000" points="0,-.5 0,-387.5 183,-387.5 183,-.5 0,-.5"/>
 <text text-anchor="middle" x="91.5" y="-375.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::runtime::Object</text>
 <polyline fill="none" stroke="#000000" points="0,-368.5 183,-368.5 "/>
@@ -49,9 +49,9 @@
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># DecRef()</text>
 <text text-anchor="start" x="8" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"># GetOrAllocRuntimeTypeIndex()</text>
 </g>
-<!-- Node668&#45;&gt;Node668 -->
+<!-- Node671&#45;&gt;Node671 -->
 <g id="edge1" class="edge">
-<title>Node668&#45;&gt;Node668</title>
+<title>Node671&#45;&gt;Node671</title>
 <path fill="none" stroke="#404040" d="M183.3625,-256.0888C194.0482,-244.6299 201,-223.9336 201,-194 201,-171.3159 197.0077,-153.9367 190.4236,-141.8623"/>
 <polygon fill="none" stroke="#404040" points="190.3069,-141.6977 183.5725,-139.1192 183.3625,-131.9112 190.0969,-134.4897 190.3069,-141.6977"/>
 <text text-anchor="middle" x="227" y="-191.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000"> #deleter_</text>
diff --git a/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html b/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html
index c0400c733..8e19da617 100644
--- a/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html
+++ b/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame.html
@@ -78,7 +78,7 @@ $(function() {
 <div class="dynheader">
 Inheritance diagram for tvm::script::ir_builder::IRBuilderFrame:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame__inherit__graph.svg" width="198" height="720"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrame__inherit__graph.svg" width="408" height="904"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
diff --git a/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html b/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html
index a36299adf..c8e325f51 100644
--- a/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html
+++ b/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode.html
@@ -76,7 +76,7 @@ $(function() {
 <div class="dynheader">
 Inheritance diagram for tvm::script::ir_builder::IRBuilderFrameNode:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode__inherit__graph.svg" width="290" height="970"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode__inherit__graph.svg" width="742" height="1256"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 <div class="dynheader">
@@ -311,7 +311,7 @@ Additional Inherited Members</h2></td></tr>
 <p>The method called when exiting RAII scope. </p>
 <dl class="section see"><dt>See also</dt><dd>tvm::support::With </dd></dl>
 
-<p>Reimplemented in <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1IRModuleFrameNode.html#ac2a80e8737ebe63c981662e61918d662">tvm::script::ir_builder::IRModuleFrameNode</a>.</p>
+<p>Reimplemented in <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1AssertFrameNode.html#ad934bf1ff350bb1d57daafbe6588f53e">tvm::script::ir_builder::tir::AssertFrameNode</a>, <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1tir_1_1PrimFuncFrameNode.html#a2b1cd6ddd59ae4a2376f2f1a1815ca43">tvm::script::ir_builder::tir::PrimFuncFrameNode</a>, and <a class="el" href="classtvm_1_1script_1_1ir__builder_1_1ir_1_1IRModuleFrameNode.html#ac8c627c5a3ffa09a22af992f8d7d7922">t [...]
 <dl><dt><b>Examples: </b></dt><dd><a class="el" href="_2workspace_2include_2tvm_2script_2ir_builder_2base_8h-example.html#a7">/workspace/include/tvm/script/ir_builder/base.h</a>.</dd>
 </dl>
 </div>
diff --git a/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode__coll__graph.svg b/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode__coll__graph.svg
index fe7ad8f82..4bd0b60fb 100644
--- a/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode__coll__graph.svg
+++ b/docs/reference/api/doxygen/classtvm_1_1script_1_1ir__builder_1_1IRBuilderFrameNode__coll__graph.svg
@@ -9,9 +9,9 @@
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 564)">
 <title>tvm::script::ir_builder::IRBuilderFrameNode</title>
 <polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-564 269,-564 269,4 -4,4"/>
-<!-- Node3 -->
+<!-- Node6 -->
 <g id="node1" class="node">
-<title>Node3</title>
+<title>Node6</title>
 <polygon fill="#bfbfbf" stroke="#000000" points="0,-.5 0,-134.5 207,-134.5 207,-.5 0,-.5"/>
 <text text-anchor="start" x="8" y="-122.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm::script::ir_builder</text>
 <text text-anchor="middle" x="103.5" y="-111.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">::IRBuilderFrameNode</text>
@@ -26,9 +26,9 @@
 <text text-anchor="start" x="8" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ ExitWithScope()</text>
 <text text-anchor="start" x="8" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">+ AddCallback()</text>
 </g>
-<!-- Node4 -->
... 84808 lines suppressed ...