You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2023/01/18 15:19:17 UTC

[tvm-site] branch asf-site updated: deploying docs (apache/tvm@da99e9d1b5208e9a23e0b8e5b45da6e633f05415)

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/tvm-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new c6fedc8548 deploying docs (apache/tvm@da99e9d1b5208e9a23e0b8e5b45da6e633f05415)
c6fedc8548 is described below

commit c6fedc8548ce22b7c3bcf9a0004e14fc3dcb9f08
Author: tvm-bot <95...@users.noreply.github.com>
AuthorDate: Wed Jan 18 15:19:08 2023 +0000

    deploying docs (apache/tvm@da99e9d1b5208e9a23e0b8e5b45da6e633f05415)
---
 .../how_to/compile_models/from_darknet.rst.txt     |    2 +-
 .../how_to/compile_models/from_keras.rst.txt       |    2 +-
 .../how_to/compile_models/from_mxnet.rst.txt       |    2 +-
 .../how_to/compile_models/from_oneflow.rst.txt     |    2 +-
 .../how_to/compile_models/from_pytorch.rst.txt     |    2 +-
 .../how_to/compile_models/from_tensorflow.rst.txt  |    2 +-
 .../compile_models/sg_execution_times.rst.txt      |   22 +-
 .../deploy_models/deploy_model_on_adreno.rst.txt   |    2 +-
 .../deploy_models/deploy_model_on_android.rst.txt  |    2 +-
 .../deploy_object_detection_pytorch.rst.txt        |    4 +-
 .../deploy_models/deploy_prequantized.rst.txt      |    6 +-
 .../deploy_prequantized_tflite.rst.txt             |    4 +-
 .../how_to/deploy_models/deploy_quantized.rst.txt  |    2 +-
 .../deploy_models/deploy_ssd_gluoncv.rst.txt       |    4 +-
 .../deploy_models/sg_execution_times.rst.txt       |   20 +-
 .../extend_tvm/bring_your_own_datatypes.rst.txt    |    2 +-
 .../how_to/extend_tvm/sg_execution_times.rst.txt   |   10 +-
 .../how_to/extend_tvm/use_pass_instrument.rst.txt  |   16 +-
 .../optimize_operators/opt_conv_cuda.rst.txt       |    2 +-
 .../optimize_operators/opt_conv_tensorcore.rst.txt |    2 +-
 .../how_to/optimize_operators/opt_gemm.rst.txt     |   16 +-
 .../optimize_operators/sg_execution_times.rst.txt  |    8 +-
 .../sg_execution_times.rst.txt                     |   14 +-
 .../tune_conv2d_layer_cuda.rst.txt                 |  995 +-------
 .../tune_network_cuda.rst.txt                      |    4 +-
 .../tune_network_x86.rst.txt                       |    4 +-
 .../tune_sparse_x86.rst.txt                        |   77 +-
 .../tune_with_autotvm/sg_execution_times.rst.txt   |   10 +-
 .../tune_with_autotvm/tune_conv2d_cuda.rst.txt     |  474 ++--
 .../work_with_microtvm/micro_autotune.rst.txt      |   16 +-
 .../work_with_microtvm/micro_pytorch.rst.txt       |    4 +-
 .../how_to/work_with_microtvm/micro_train.rst.txt  |   16 +-
 .../work_with_microtvm/sg_execution_times.rst.txt  |   12 +-
 .../work_with_relay/sg_execution_times.rst.txt     |    8 +-
 .../how_to/work_with_schedules/intrin_math.rst.txt |    2 +-
 .../work_with_schedules/sg_execution_times.rst.txt |   16 +-
 .../how_to/work_with_schedules/tensorize.rst.txt   |    2 +-
 .../tutorials/autotvm/sg_execution_times.rst.txt   |    4 +-
 .../frontend/deploy_classification.rst.txt         |    2 +-
 .../tutorials/frontend/deploy_detection.rst.txt    |    2 +-
 .../tutorials/frontend/sg_execution_times.rst.txt  |    6 +-
 .../tutorials/optimize/sg_execution_times.rst.txt  |    6 +-
 .../topic/vta/tutorials/sg_execution_times.rst.txt |    6 +-
 .../tutorial/auto_scheduler_matmul_x86.rst.txt     |   11 +-
 docs/_sources/tutorial/autotvm_matmul_x86.rst.txt  |   20 +-
 docs/_sources/tutorial/autotvm_relay_x86.rst.txt   |   58 +-
 .../tutorial/cross_compilation_and_rpc.rst.txt     |    2 +-
 docs/_sources/tutorial/intro_topi.rst.txt          |    2 +-
 docs/_sources/tutorial/sg_execution_times.rst.txt  |   22 +-
 .../tutorial/tensor_expr_get_started.rst.txt       |   43 +-
 docs/commit_hash                                   |    2 +-
 docs/genindex.html                                 |   18 +-
 docs/how_to/compile_models/from_darknet.html       |    2 +-
 docs/how_to/compile_models/from_keras.html         |    2 +-
 docs/how_to/compile_models/from_mxnet.html         |    2 +-
 docs/how_to/compile_models/from_oneflow.html       |   15 +-
 docs/how_to/compile_models/from_pytorch.html       |    9 +-
 docs/how_to/compile_models/from_tensorflow.html    |    2 +-
 docs/how_to/compile_models/sg_execution_times.html |   22 +-
 .../deploy_models/deploy_model_on_adreno.html      |    2 +-
 .../deploy_models/deploy_model_on_android.html     |    2 +-
 .../deploy_object_detection_pytorch.html           |   44 +-
 docs/how_to/deploy_models/deploy_prequantized.html |    8 +-
 .../deploy_models/deploy_prequantized_tflite.html  |    4 +-
 docs/how_to/deploy_models/deploy_quantized.html    |    2 +-
 docs/how_to/deploy_models/deploy_ssd_gluoncv.html  |   37 +-
 docs/how_to/deploy_models/sg_execution_times.html  |   20 +-
 .../extend_tvm/bring_your_own_datatypes.html       |    2 +-
 docs/how_to/extend_tvm/sg_execution_times.html     |   10 +-
 docs/how_to/extend_tvm/use_pass_instrument.html    |   18 +-
 docs/how_to/optimize_operators/opt_conv_cuda.html  |    2 +-
 .../optimize_operators/opt_conv_tensorcore.html    |    2 +-
 docs/how_to/optimize_operators/opt_gemm.html       |   16 +-
 .../optimize_operators/sg_execution_times.html     |    8 +-
 .../sg_execution_times.html                        |   14 +-
 .../tune_conv2d_layer_cuda.html                    |  995 +-------
 .../tune_with_autoscheduler/tune_network_cuda.html |    4 +-
 .../tune_with_autoscheduler/tune_network_x86.html  |    4 +-
 .../tune_with_autoscheduler/tune_sparse_x86.html   |   77 +-
 .../tune_with_autotvm/sg_execution_times.html      |   14 +-
 .../how_to/tune_with_autotvm/tune_conv2d_cuda.html |  474 ++--
 docs/how_to/work_with_microtvm/micro_autotune.html |   16 +-
 docs/how_to/work_with_microtvm/micro_pytorch.html  |    4 +-
 docs/how_to/work_with_microtvm/micro_train.html    |   16 +-
 .../work_with_microtvm/sg_execution_times.html     |   12 +-
 .../how_to/work_with_relay/sg_execution_times.html |    8 +-
 docs/how_to/work_with_schedules/intrin_math.html   |    2 +-
 .../work_with_schedules/sg_execution_times.html    |   16 +-
 docs/how_to/work_with_schedules/tensorize.html     |    2 +-
 docs/install/nnpack.html                           |   12 +-
 docs/objects.inv                                   |  Bin 24170 -> 24181 bytes
 docs/reference/api/doxygen/algorithm_8h__incl.svg  |   36 +-
 docs/reference/api/doxygen/algorithms_8h__incl.svg |  100 +-
 docs/reference/api/doxygen/annotated.html          |  708 +++---
 docs/reference/api/doxygen/array_8h.html           |    2 +-
 docs/reference/api/doxygen/array_8h__dep__incl.svg | 1176 +++++----
 .../api/doxygen/auto__schedule_8h__incl.svg        |   36 +-
 .../doxygen/auto__scheduler_2feature_8h__incl.svg  |   32 +-
 docs/reference/api/doxygen/bias__add_8h__incl.svg  |  188 +-
 .../api/doxygen/broadcast_8h__dep__incl.svg        |   92 +-
 docs/reference/api/doxygen/broadcast_8h__incl.svg  |   64 +-
 .../api/doxygen/builder_8h__dep__incl.svg          |   40 +-
 .../api/doxygen/c__runtime__api_8h__dep__incl.svg  |   20 +-
 docs/reference/api/doxygen/classes.html            |   48 +-
 .../classtvm_1_1CompileError__coll__graph.svg      |  130 -
 ...classtvm_1_1relay_1_1CompileError-members.html} |   16 +-
 ...html => classtvm_1_1relay_1_1CompileError.html} |   84 +-
 ...sstvm_1_1relay_1_1CompileError__coll__graph.svg |  130 +
 ...m_1_1relay_1_1CompileError__inherit__graph.svg} |   34 +-
 ...lasstvm_1_1relay_1_1ErrorReporter-members.html} |   18 +-
 ...tml => classtvm_1_1relay_1_1ErrorReporter.html} |   96 +-
 ...tvm_1_1relay_1_1ErrorReporter__coll__graph.svg} |   20 +-
 docs/reference/api/doxygen/codegen_8h__incl.svg    |  116 +-
 .../api/doxygen/compilation__config_8h.html        |    2 +-
 .../doxygen/compilation__config_8h__dep__incl.svg  |   20 +-
 .../api/doxygen/compilation__config_8h__incl.svg   | 1223 +++++----
 .../api/doxygen/compilation__config_8h_source.html |    2 +-
 docs/reference/api/doxygen/constant__utils_8h.html |    2 +-
 .../api/doxygen/constant__utils_8h__dep__incl.svg  |  168 +-
 .../api/doxygen/constant__utils_8h__incl.svg       | 1234 +++++-----
 .../api/doxygen/cuda_2dense_8h__dep__incl.svg      |   12 +-
 .../reference/api/doxygen/cuda_2dense_8h__incl.svg |  480 ++--
 .../api/doxygen/cuda_2injective_8h__dep__incl.svg  |   12 +-
 .../api/doxygen/cuda_2injective_8h__incl.svg       |  444 ++--
 .../api/doxygen/cuda_2pooling_8h__dep__incl.svg    |   12 +-
 .../api/doxygen/cuda_2pooling_8h__incl.svg         |  448 ++--
 .../api/doxygen/cuda_2reduction_8h__dep__incl.svg  |   12 +-
 .../api/doxygen/cuda_2reduction_8h__incl.svg       |  444 ++--
 .../api/doxygen/cuda_2softmax_8h__dep__incl.svg    |   12 +-
 .../api/doxygen/cuda_2softmax_8h__incl.svg         |  444 ++--
 .../api/doxygen/data__type_8h__dep__incl.svg       |  140 +-
 .../api/doxygen/database_8h__dep__incl.svg         |   44 +-
 docs/reference/api/doxygen/database_8h__incl.svg   |   20 +-
 .../api/doxygen/dataflow__matcher_8h__incl.svg     |  204 +-
 .../doxygen/dataflow__pattern_8h__dep__incl.svg    |   24 +-
 .../api/doxygen/dataflow__pattern_8h__incl.svg     |  236 +-
 .../dataflow__pattern__functor_8h__dep__incl.svg   |   12 +-
 .../dataflow__pattern__functor_8h__incl.svg        |  208 +-
 .../doxygen/detail_2broadcast_8h__dep__incl.svg    |  104 +-
 .../api/doxygen/detail_2broadcast_8h__incl.svg     |   48 +-
 docs/reference/api/doxygen/device__copy_8h.html    |    2 +-
 .../api/doxygen/device__copy_8h__incl.svg          | 1619 ++++++------
 docs/reference/api/doxygen/dir_000005_000007.html  |    2 +-
 .../dir_63946bee875c6d52bce55e72a67a86ad.html      |    2 +
 .../dir_dc867ff9a37cad1764f1670dc7eba6c1.html      |    3 -
 docs/reference/api/doxygen/driver__api_8h.html     |    2 +-
 .../reference/api/doxygen/driver__api_8h__incl.svg | 1912 ++++++++-------
 docs/reference/api/doxygen/einsum_8h__incl.svg     |  100 +-
 .../api/doxygen/elemwise_8h__dep__incl.svg         |   48 +-
 docs/reference/api/doxygen/error_8h.html           |   28 +-
 docs/reference/api/doxygen/error_8h__dep__incl.svg |  973 +-------
 docs/reference/api/doxygen/error_8h__incl.svg      |   14 +-
 docs/reference/api/doxygen/error_8h_source.html    |   34 +-
 docs/reference/api/doxygen/files.html              |   56 +-
 docs/reference/api/doxygen/flatten_8h__incl.svg    |   52 +-
 docs/reference/api/doxygen/functions_a.html        |    2 +-
 docs/reference/api/doxygen/functions_c.html        |    4 +-
 docs/reference/api/doxygen/functions_e.html        |    2 +-
 docs/reference/api/doxygen/functions_func_a.html   |    2 +-
 docs/reference/api/doxygen/functions_func_c.html   |    2 +-
 docs/reference/api/doxygen/functions_func_e.html   |    2 +-
 docs/reference/api/doxygen/functions_func_o.html   |    2 +-
 docs/reference/api/doxygen/functions_func_r.html   |    8 +-
 docs/reference/api/doxygen/functions_func_s.html   |    2 +-
 docs/reference/api/doxygen/functions_func_t.html   |    2 +-
 docs/reference/api/doxygen/functions_func_v.html   |    2 +-
 docs/reference/api/doxygen/functions_m.html        |    2 +-
 docs/reference/api/doxygen/functions_o.html        |    2 +-
 docs/reference/api/doxygen/functions_r.html        |    6 +-
 docs/reference/api/doxygen/functions_rela.html     |    2 +-
 docs/reference/api/doxygen/functions_s.html        |    4 +-
 docs/reference/api/doxygen/functions_t.html        |    6 +-
 docs/reference/api/doxygen/functions_u.html        |    2 +-
 docs/reference/api/doxygen/functions_v.html        |    6 +-
 docs/reference/api/doxygen/functions_vars_s.html   |    2 +-
 .../api/doxygen/functor_8h__dep__incl.svg          |   16 +-
 .../api/doxygen/generic_2default_8h__incl.svg      |  444 ++--
 .../api/doxygen/generic_2extern_8h__dep__incl.svg  |   24 +-
 .../api/doxygen/generic_2extern_8h__incl.svg       |  464 ++--
 .../doxygen/generic_2injective_8h__dep__incl.svg   |   32 +-
 .../api/doxygen/generic_2injective_8h__incl.svg    |  444 ++--
 .../api/doxygen/generic__func_8h__dep__incl.svg    |  196 +-
 docs/reference/api/doxygen/hierarchy.html          |   14 +-
 docs/reference/api/doxygen/inherit_graph_12.svg    |   24 +-
 docs/reference/api/doxygen/inherit_graph_62.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_63.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_64.svg    |    4 +-
 docs/reference/api/doxygen/inherit_graph_65.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_66.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_67.svg    |   16 +-
 docs/reference/api/doxygen/inherit_graph_68.svg    |   12 +-
 docs/reference/api/doxygen/inherit_graph_69.svg    |   16 +-
 docs/reference/api/doxygen/inherit_graph_70.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_71.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_72.svg    |   14 +-
 docs/reference/api/doxygen/inherit_graph_73.svg    |   25 +-
 docs/reference/api/doxygen/inherit_graph_74.svg    |   15 +-
 docs/reference/api/doxygen/inherit_graph_75.svg    |   30 +-
 docs/reference/api/doxygen/inherits.html           |   28 +-
 .../api/doxygen/instrument_8h_source.html          |    2 +-
 .../reference/api/doxygen/interpreter_8h__incl.svg |  152 +-
 .../api/doxygen/interpreter_8h_source.html         |    2 +-
 docs/reference/api/doxygen/ir_2adt_8h.html         |    2 +-
 .../api/doxygen/ir_2adt_8h__dep__incl.svg          |  900 ++++---
 docs/reference/api/doxygen/ir_2attrs_8h.html       |    2 +-
 .../api/doxygen/ir_2attrs_8h__dep__incl.svg        |  747 +++---
 .../api/doxygen/ir_2expr_8h__dep__incl.svg         |   28 +-
 docs/reference/api/doxygen/ir_2function_8h.html    |    2 +-
 .../api/doxygen/ir_2function_8h__dep__incl.svg     |  894 ++++---
 docs/reference/api/doxygen/ir_2module_8h.html      |   12 +-
 .../api/doxygen/ir_2module_8h__dep__incl.svg       |  908 ++++---
 .../api/doxygen/ir_2module_8h_source.html          |   21 +-
 docs/reference/api/doxygen/ir_2span_8h.html        |    2 +-
 .../api/doxygen/ir_2span_8h__dep__incl.svg         | 1631 ++++++------
 docs/reference/api/doxygen/ir_2transform_8h.html   |    5 +-
 .../api/doxygen/ir_2transform_8h__dep__incl.svg    |  536 ++--
 .../api/doxygen/ir_2transform_8h__incl.svg         | 1487 ++++++-----
 .../api/doxygen/ir_2transform_8h_source.html       |   73 +-
 docs/reference/api/doxygen/ir_2type_8h.html        |    2 +-
 .../api/doxygen/ir_2type_8h__dep__incl.svg         | 1035 ++++----
 docs/reference/api/doxygen/map_8h.html             |    2 +-
 docs/reference/api/doxygen/map_8h__dep__incl.svg   | 1254 +++++-----
 .../api/doxygen/measure_8h__dep__incl.svg          |   48 +-
 .../doxygen/measure__callback_8h__dep__incl.svg    |   12 +-
 .../api/doxygen/measure__callback_8h__incl.svg     |  348 +--
 .../api/doxygen/memory__pools_8h__dep__incl.svg    |   44 +-
 .../api/doxygen/memory__pools_8h__incl.svg         |   20 +-
 docs/reference/api/doxygen/namespacemembers.html   |    2 +-
 .../api/doxygen/namespacemembers_func.html         |    2 +-
 .../api/doxygen/namespacemembers_func_p.html       |    2 +-
 docs/reference/api/doxygen/namespacemembers_p.html |    2 +-
 docs/reference/api/doxygen/namespacetvm.html       |  553 ++---
 .../api/doxygen/namespacetvm_1_1relay.html         |  155 +-
 docs/reference/api/doxygen/nn_2bnn_8h__incl.svg    |   52 +-
 .../reference/api/doxygen/nn_2pooling_8h__incl.svg |  276 +--
 .../reference/api/doxygen/nn_2softmax_8h__incl.svg |  228 +-
 docs/reference/api/doxygen/node_8h__dep__incl.svg  |   48 +-
 .../reference/api/doxygen/object_8h__dep__incl.svg |  164 +-
 .../api/doxygen/object__path_8h__dep__incl.svg     |   12 +-
 docs/reference/api/doxygen/on__device_8h.html      |    2 +-
 docs/reference/api/doxygen/on__device_8h__incl.svg | 1619 ++++++------
 .../api/doxygen/op__strategy_8h__incl.svg          |  452 ++--
 .../api/doxygen/optional_8h__dep__incl.svg         |   48 +-
 .../api/doxygen/packed__func_8h__dep__incl.svg     |   64 +-
 docs/reference/api/doxygen/parser_8h.html          |    2 +-
 docs/reference/api/doxygen/parser_8h__incl.svg     | 1571 ++++++------
 docs/reference/api/doxygen/parser_8h_source.html   |    2 +-
 .../reference/api/doxygen/pattern__functor_8h.html |    6 +-
 .../api/doxygen/pattern__functor_8h__incl.svg      | 2018 +++++++--------
 .../api/doxygen/pattern__functor_8h_source.html    |    6 +-
 .../api/doxygen/reduction_8h__dep__incl.svg        |   40 +-
 docs/reference/api/doxygen/reduction_8h__incl.svg  |  232 +-
 docs/reference/api/doxygen/registry_8h.html        |    2 +-
 .../api/doxygen/registry_8h__dep__incl.svg         |  675 +++--
 .../api/doxygen/relay_2adt_8h__dep__incl.svg       |   28 +-
 docs/reference/api/doxygen/relay_2adt_8h__incl.svg |   68 +-
 .../api/doxygen/relay_2adt_8h_source.html          |    2 +-
 .../api/doxygen/relay_2analysis_8h__incl.svg       |   92 +-
 .../api/doxygen/relay_2attrs_2memory_8h__incl.svg  |  164 +-
 .../doxygen/relay_2attrs_2memory_8h_source.html    |    2 +-
 .../relay_2attrs_2transform_8h__dep__incl.svg      |   20 +-
 .../doxygen/relay_2attrs_2transform_8h__incl.svg   |   64 +-
 docs/reference/api/doxygen/relay_2base_8h.html     |   12 +-
 .../api/doxygen/relay_2base_8h_source.html         |    5 +-
 .../api/doxygen/relay_2expr_8h__dep__incl.svg      |  200 +-
 .../reference/api/doxygen/relay_2expr_8h__incl.svg |  180 +-
 .../api/doxygen/relay_2expr_8h_source.html         |  161 +-
 .../api/doxygen/relay_2expr__functor_8h.html       |    6 +-
 .../api/doxygen/relay_2expr__functor_8h__incl.svg  | 2590 ++++++++++----------
 .../doxygen/relay_2expr__functor_8h_source.html    |   24 +-
 .../api/doxygen/relay_2feature_8h__incl.svg        |   72 +-
 .../api/doxygen/relay_2function_8h__dep__incl.svg  |   36 +-
 .../api/doxygen/relay_2function_8h__incl.svg       |  148 +-
 .../api/doxygen/relay_2op_8h__dep__incl.svg        |   36 +-
 docs/reference/api/doxygen/relay_2op_8h__incl.svg  |  236 +-
 .../relay_2op__attr__types_8h__dep__incl.svg       |   28 +-
 .../doxygen/relay_2op__attr__types_8h__incl.svg    |  444 ++--
 .../doxygen/relay_2op__attr__types_8h_source.html  |    2 +-
 .../api/doxygen/relay_2qnn_2transform_8h.html      |    2 +-
 .../api/doxygen/relay_2qnn_2transform_8h__incl.svg | 1932 ++++++++-------
 .../doxygen/relay_2qnn_2transform_8h_source.html   |    2 +-
 .../reference/api/doxygen/relay_2transform_8h.html |    2 +-
 .../api/doxygen/relay_2transform_8h__dep__incl.svg |   12 +-
 .../api/doxygen/relay_2transform_8h__incl.svg      | 2113 ++++++++--------
 .../api/doxygen/relay_2transform_8h_source.html    |   16 +-
 docs/reference/api/doxygen/reorg_8h__incl.svg      |  288 +--
 .../api/doxygen/repr__printer_8h__dep__incl.svg    |   48 +-
 .../reference/api/doxygen/rocm_2dense_8h__incl.svg |  496 ++--
 .../api/doxygen/rocm_2injective_8h__incl.svg       |  444 ++--
 .../api/doxygen/rocm_2pooling_8h__incl.svg         |  452 ++--
 .../api/doxygen/rocm_2reduction_8h__incl.svg       |  444 ++--
 .../api/doxygen/rocm_2softmax_8h__incl.svg         |  444 ++--
 .../api/doxygen/runtime_2container_2adt_8h.html    |    2 +-
 .../runtime_2container_2adt_8h__dep__incl.svg      |  850 ++++---
 .../runtime_2container_2base_8h__dep__incl.svg     |  144 +-
 .../api/doxygen/runtime_2memory_8h__dep__incl.svg  |  144 +-
 .../api/doxygen/runtime_2memory_8h_source.html     |    2 +-
 .../api/doxygen/runtime_2module_8h__dep__incl.svg  |   64 +-
 docs/reference/api/doxygen/search/all_10.js        |    4 +-
 docs/reference/api/doxygen/search/all_11.js        |    6 +-
 docs/reference/api/doxygen/search/all_13.js        |    8 +-
 docs/reference/api/doxygen/search/all_14.js        |   18 +-
 docs/reference/api/doxygen/search/all_15.js        |   16 +-
 docs/reference/api/doxygen/search/all_16.js        |    2 +-
 docs/reference/api/doxygen/search/all_17.js        |    4 +-
 docs/reference/api/doxygen/search/all_18.js        |    2 +-
 docs/reference/api/doxygen/search/all_2.js         |    4 +-
 docs/reference/api/doxygen/search/all_4.js         |    2 +-
 docs/reference/api/doxygen/search/all_6.js         |    4 +-
 docs/reference/api/doxygen/search/all_a.js         |    2 +-
 docs/reference/api/doxygen/search/all_d.js         |    2 +-
 docs/reference/api/doxygen/search/all_e.js         |    4 +-
 docs/reference/api/doxygen/search/classes_10.js    |    4 +-
 docs/reference/api/doxygen/search/classes_11.js    |   10 +-
 docs/reference/api/doxygen/search/classes_13.js    |    2 +-
 docs/reference/api/doxygen/search/classes_2.js     |    2 +-
 docs/reference/api/doxygen/search/classes_4.js     |    4 +-
 docs/reference/api/doxygen/search/classes_8.js     |    2 +-
 docs/reference/api/doxygen/search/classes_9.js     |    2 +-
 docs/reference/api/doxygen/search/classes_d.js     |    2 +-
 docs/reference/api/doxygen/search/functions_1.js   |    4 +-
 docs/reference/api/doxygen/search/functions_10.js  |    2 +-
 docs/reference/api/doxygen/search/functions_12.js  |    8 +-
 docs/reference/api/doxygen/search/functions_13.js  |    6 +-
 docs/reference/api/doxygen/search/functions_14.js  |    2 +-
 docs/reference/api/doxygen/search/functions_16.js  |    2 +-
 docs/reference/api/doxygen/search/functions_17.js  |    2 +-
 docs/reference/api/doxygen/search/functions_3.js   |    2 +-
 docs/reference/api/doxygen/search/functions_5.js   |    2 +-
 docs/reference/api/doxygen/search/functions_d.js   |    4 +-
 docs/reference/api/doxygen/search/functions_f.js   |    2 +-
 docs/reference/api/doxygen/search/related_3.js     |    2 +-
 docs/reference/api/doxygen/search/variables_11.js  |    2 +-
 .../api/doxygen/search__policy_8h__dep__incl.svg   |   12 +-
 .../api/doxygen/search__strategy_8h__dep__incl.svg |   36 +-
 .../api/doxygen/search__task_8h__dep__incl.svg     |   60 +-
 docs/reference/api/doxygen/source__map_8h.html     |    2 +-
 .../api/doxygen/source__map_8h__dep__incl.svg      |  900 ++++---
 .../api/doxygen/space__generator_8h__dep__incl.svg |   32 +-
 .../api/doxygen/strided__slice_8h__dep__incl.svg   |   72 +-
 .../reference/api/doxygen/string_8h__dep__incl.svg |   60 +-
 ...tructtvm_1_1relay_1_1ErrorBuilder-members.html} |   10 +-
 ...tml => structtvm_1_1relay_1_1ErrorBuilder.html} |   36 +-
 ...ttvm_1_1relay_1_1ErrorBuilder__coll__graph.svg} |   20 +-
 .../doxygen/structural__equal_8h__dep__incl.svg    |   12 +-
 .../api/doxygen/structural__hash_8h__dep__incl.svg |   12 +-
 .../reference/api/doxygen/target_8h__dep__incl.svg |  504 ++--
 docs/reference/api/doxygen/target_8h__incl.svg     |   20 +-
 docs/reference/api/doxygen/target__kind_8h.html    |    2 +-
 .../api/doxygen/target__kind_8h__dep__incl.svg     |  504 ++--
 .../api/doxygen/target__kind_8h__incl.svg          | 1624 ++++++------
 .../api/doxygen/target__kind_8h_source.html        |    2 +-
 .../api/doxygen/task__scheduler_8h__incl.svg       |  360 +--
 docs/reference/api/doxygen/tir_2analysis_8h.html   |    2 +-
 .../api/doxygen/tir_2analysis_8h__dep__incl.svg    |  176 +-
 .../api/doxygen/tir_2analysis_8h__incl.svg         | 1657 ++++++-------
 .../api/doxygen/tir_2analysis_8h_source.html       |    2 +-
 docs/reference/api/doxygen/tir_2transform_8h.html  |    2 +-
 .../api/doxygen/tir_2transform_8h__incl.svg        | 1706 +++++++------
 .../api/doxygen/tir_2usmp_2analysis_8h__incl.svg   |   24 +-
 .../api/doxygen/tir_2usmp_2transform_8h__incl.svg  |  100 +-
 .../doxygen/tir_2usmp_2transform_8h_source.html    |    2 +-
 .../api/doxygen/tir_2usmp_2utils_8h__dep__incl.svg |   36 +-
 .../api/doxygen/tir_2usmp_2utils_8h__incl.svg      |  100 +-
 .../api/doxygen/topi_2nn_8h__dep__incl.svg         |   12 +-
 docs/reference/api/doxygen/topi_2nn_8h__incl.svg   |  288 +--
 .../api/doxygen/topi_2transform_8h__dep__incl.svg  |   64 +-
 .../api/doxygen/topi_2transform_8h__incl.svg       |  100 +-
 .../api/doxygen/tune__context_8h__dep__incl.svg    |   24 +-
 .../api/doxygen/tune__context_8h__incl.svg         |  388 +--
 docs/reference/api/doxygen/virtual__device_8h.html |    2 +-
 .../api/doxygen/virtual__device_8h__dep__incl.svg  |  244 +-
 .../api/doxygen/virtual__device_8h__incl.svg       | 1512 ++++++------
 docs/reference/api/doxygen/x86_2bnn_8h__incl.svg   |  440 ++--
 .../api/doxygen/x86_2default_8h__incl.svg          |  444 ++--
 .../api/doxygen/x86_2injective_8h__incl.svg        |  440 ++--
 docs/reference/api/python/auto_scheduler.html      |    4 +-
 docs/reference/api/python/ir.html                  |  179 +-
 .../api/python/relay/dataflow_pattern.html         |   84 +-
 docs/reference/api/python/relay/index.html         |   77 +
 docs/reference/api/python/tir.html                 |   33 +-
 .../api/typedoc/classes/bytestreamreader.html      |   12 +-
 .../api/typedoc/classes/cachedcallstack.html       |   34 +-
 docs/reference/api/typedoc/classes/dldatatype.html |   12 +-
 docs/reference/api/typedoc/classes/dldevice.html   |   10 +-
 .../reference/api/typedoc/classes/environment.html |   12 +-
 docs/reference/api/typedoc/classes/ffilibrary.html |   20 +-
 .../api/typedoc/classes/graphexecutor.html         |   16 +-
 docs/reference/api/typedoc/classes/instance.html   |   40 +-
 docs/reference/api/typedoc/classes/memory.html     |   34 +-
 docs/reference/api/typedoc/classes/module.html     |   10 +-
 docs/reference/api/typedoc/classes/ndarray.html    |   22 +-
 .../api/typedoc/classes/packedfunccell.html        |    6 +-
 docs/reference/api/typedoc/classes/rpcserver.html  |   14 +-
 docs/reference/api/typedoc/classes/scalar.html     |    6 +-
 .../api/typedoc/classes/webgpucontext.html         |   12 +-
 docs/reference/api/typedoc/enums/argtypecode.html  |   30 +-
 .../api/typedoc/enums/aynccallbackcode.html        |    4 +-
 .../api/typedoc/enums/dldatatypecode.html          |    8 +-
 .../api/typedoc/enums/rpcserverstate.html          |   12 +-
 docs/reference/api/typedoc/enums/sizeof.html       |   18 +-
 docs/reference/api/typedoc/index.html              |  112 +-
 .../api/typedoc/interfaces/disposable.html         |    2 +-
 .../api/typedoc/interfaces/functioninfo.html       |    6 +-
 .../api/typedoc/interfaces/libraryprovider.html    |    4 +-
 docs/searchindex.js                                |    2 +-
 .../vta/tutorials/autotvm/sg_execution_times.html  |    4 +-
 .../tutorials/frontend/deploy_classification.html  |    2 +-
 .../vta/tutorials/frontend/deploy_detection.html   |    2 +-
 .../vta/tutorials/frontend/sg_execution_times.html |    6 +-
 .../vta/tutorials/optimize/sg_execution_times.html |    6 +-
 docs/topic/vta/tutorials/sg_execution_times.html   |    6 +-
 docs/tutorial/auto_scheduler_matmul_x86.html       |    7 +-
 docs/tutorial/autotvm_matmul_x86.html              |   20 +-
 docs/tutorial/autotvm_relay_x86.html               |  284 +--
 docs/tutorial/cross_compilation_and_rpc.html       |    2 +-
 docs/tutorial/intro_topi.html                      |    2 +-
 docs/tutorial/relay_quick_start.html               |    2 +-
 docs/tutorial/sg_execution_times.html              |   26 +-
 docs/tutorial/tensor_expr_get_started.html         |   39 +-
 420 files changed, 30637 insertions(+), 33631 deletions(-)

diff --git a/docs/_sources/how_to/compile_models/from_darknet.rst.txt b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
index 21c072813d..18bead6d8b 100644
--- a/docs/_sources/how_to/compile_models/from_darknet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_darknet.rst.txt
@@ -318,7 +318,7 @@ The process is no different from other examples.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  14.880 seconds)
+   **Total running time of the script:** ( 1 minutes  16.940 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_darknet.py:
diff --git a/docs/_sources/how_to/compile_models/from_keras.rst.txt b/docs/_sources/how_to/compile_models/from_keras.rst.txt
index 0a0d472cfc..9705b23dc4 100644
--- a/docs/_sources/how_to/compile_models/from_keras.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_keras.rst.txt
@@ -232,7 +232,7 @@ Look up prediction top 1 index in 1000 class synset.
  .. code-block:: none
 
     Relay top-1 id: 285, class name: Egyptian cat
-
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 880ms/step
+
    1/1 [==============================] - ETA: 0s
    1/1 [==============================] - 1s 946ms/step
     Keras top-1 id: 285, class name: Egyptian cat
 
 
diff --git a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
index e25bdd0c81..5aac0b9b18 100644
--- a/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_mxnet.rst.txt
@@ -116,7 +116,7 @@ In this section, we download a pretrained imagenet model and classify an image.
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip362a85cc-5fe7-4587-9c52-f98a850c7209 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+    Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipb42336e4-438b-476c-8eca-027629a47300 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
     x (1, 3, 224, 224)
 
 
diff --git a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
index f546bb71cf..8ba8bdc7bb 100644
--- a/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_oneflow.rst.txt
@@ -121,7 +121,7 @@ Load a pretrained OneFlow model and save model
  .. code-block:: none
 
     Downloading: "https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip" to /workspace/.oneflow/flowvision_cache/resnet18.zip
-
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     19%|#9        | 7.99M/41.5M [00:00<00:00, 44.4MB/s]
     35%|###4      | 14.3M/41.5M [00:00<00:00, 53.6MB/s]
     54%|#####3    | 22.3M/41.5M [00:00<00:00, 54.6MB/s]
     67%|######6   | 27.7M/41.5M [00:00<00:00, 52.9MB/s]
     82%|########2 | 34.1M/41.5M [00:00<00:00, 55.9MB/s]
     96%|#########6| 40.0M/41.5M [00:00<00:00, 55.6MB/s]
    100%|##########| 41.5M/41.5M [00:00<00:00, 54.8MB/s]
+
      0%|          | 0.00/41.5M [00:00<?, ?B/s]
     15%|#5        | 6.33M/41.5M [00:00<00:00, 47.7MB/s]
     26%|##6       | 10.9M/41.5M [00:00<00:00, 47.6MB/s]
     39%|###8      | 16.0M/41.5M [00:00<00:00, 49.0MB/s]
     54%|#####3    | 22.3M/41.5M [00:00<00:00, 41.5MB/s]
     64%|######3   | 26.5M/41.5M [00:00<00:00, 37.5MB/s]
     77%|#######7  | 32.0M/41.5M [00:00<00:00, 41.5MB/s]
     92%|#########2| 38.3M/41.5M [00:00<00:00, 38.8MB/s]
    100%|##########| 41.5M/41.5M [00:01<00:00, 42.1MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
index 0db8af73f5..29f0f0c1b2 100644
--- a/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_pytorch.rst.txt
@@ -101,7 +101,7 @@ Load a pretrained PyTorch model
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=ResNet18_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet18_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
-
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     30%|###       | 13.6M/44.7M [00:00<00:00, 142MB/s]
     65%|######4   | 28.9M/44.7M [00:00<00:00, 153MB/s]
     97%|#########7| 43.4M/44.7M [00:00<00:00, 118MB/s]
    100%|##########| 44.7M/44.7M [00:00<00:00, 119MB/s]
+
      0%|          | 0.00/44.7M [00:00<?, ?B/s]
     14%|#4        | 6.30M/44.7M [00:00<00:00, 62.9MB/s]
     28%|##7       | 12.3M/44.7M [00:00<00:00, 56.2MB/s]
     56%|#####5    | 24.9M/44.7M [00:00<00:00, 88.4MB/s]
     88%|########7 | 39.3M/44.7M [00:00<00:00, 112MB/s] 
    100%|##########| 44.7M/44.7M [00:00<00:00, 96.2MB/s]
 
 
 
diff --git a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
index 560ec2a99f..f70c3d3e14 100644
--- a/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
+++ b/docs/_sources/how_to/compile_models/from_tensorflow.rst.txt
@@ -424,7 +424,7 @@ Run the corresponding model on tensorflow
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  17.877 seconds)
+   **Total running time of the script:** ( 1 minutes  21.156 seconds)
 
 
 .. _sphx_glr_download_how_to_compile_models_from_tensorflow.py:
diff --git a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
index aa4a6bff6c..85087c5cdb 100644
--- a/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/compile_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**06:07.783** total execution time for **how_to_compile_models** files:
+**06:21.358** total execution time for **how_to_compile_models** files:
 
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:17.877 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tensorflow.py` (``from_tensorflow.py``) | 01:21.156 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:14.880 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_darknet.py` (``from_darknet.py``)       | 01:16.940 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:50.409 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_paddle.py` (``from_paddle.py``)         | 00:52.745 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:33.733 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_oneflow.py` (``from_oneflow.py``)       | 00:35.607 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:29.568 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_mxnet.py` (``from_mxnet.py``)           | 00:30.318 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:29.300 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_coreml.py` (``from_coreml.py``)         | 00:30.047 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:26.765 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_tflite.py` (``from_tflite.py``)         | 00:27.172 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:23.543 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_pytorch.py` (``from_pytorch.py``)       | 00:24.515 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:19.171 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_keras.py` (``from_keras.py``)           | 00:20.262 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.536 | 0.0 MB |
+| :ref:`sphx_glr_how_to_compile_models_from_onnx.py` (``from_onnx.py``)             | 00:02.597 | 0.0 MB |
 +-----------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
index 1d71e11f24..39282b83ba 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_adreno.rst.txt
@@ -727,7 +727,7 @@ well as provides information about the model's performance
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-     2685.0209    2684.0255    2688.4638    2683.0311      1.9015   
+     2544.0319    2543.2676    2549.7816    2542.3935      2.0287   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
index dfa7053387..8e043c1597 100644
--- a/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_model_on_android.rst.txt
@@ -437,7 +437,7 @@ Execute on TVM
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      15.5922      15.5360      15.8489      15.4871       0.1193   
+      16.3474      16.5616      16.7901      15.5248       0.4458   
                
 
 
diff --git a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
index 8ab1bbf3b0..3df572957f 100644
--- a/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_object_detection_pytorch.rst.txt
@@ -130,7 +130,7 @@ Load pre-trained maskrcnn from torchvision and do tracing
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=MaskRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
-
      0%|          | 0.00/170M [00:00<?, ?B/s]
      5%|4         | 7.99M/170M [00:00<00:02, 64.5MB/s]
      9%|9         | 16.0M/170M [00:00<00:02, 69.0MB/s]
     14%|#4        | 24.0M/170M [00:00<00:02, 71.5MB/s]
     19%|#8        | 32.0M/170M [00:00<00:01, 75.2MB/s]
     24%|##3       | 40.0M/170M [00:00<00:01, 71.0MB/s]
     28%|##8       | 48.0M/170M [00:00<00:01, 74.5MB/s]
     35%|###4      | 59.3M/170M [00:00<00:01, 87.9MB/s]
     41%|####1     | 70.0M/170M [00:00<00:01, 95.2MB/s]
     47%|####7     | 80.0M/170M [00:01<00:01, 90.9MB/s]
     52%|#####2    | 88.8M/170M [00:01<00:01, 62.6MB/s]
     57%|#####6    | 96.0M/170M [00:01<00:01, 64.9MB/s]
     62%|######1   | 104M/170M [00:01<00:00, 70.6MB/s] 
     69%|######8   | 116M/170M [00:01<00:00, 84.3MB/s]
     74%|#######3  | 125M/170M [00:01<00:00, 59.1MB/s]
     78%|#######7  | 132M/170M [00:02<00:00, 57.3MB/s]
     85%|########4 | 144M/170M [00:02<00:00, 64.4MB/s]
     89%|########9 | 152M/170M [00:02<00:00, 60.5MB/s]
  
    96%|#########5| 163M/170M [00:02<00:00, 71.2MB/s]
    100%|##########| 170M/170M [00:02<00:00, 71.5MB/s]
+
      0%|          | 0.00/170M [00:00<?, ?B/s]
      5%|4         | 8.00M/170M [00:00<00:02, 63.5MB/s]
     11%|#         | 18.2M/170M [00:00<00:01, 86.2MB/s]
     16%|#5        | 26.7M/170M [00:00<00:02, 52.7MB/s]
     19%|#9        | 32.9M/170M [00:00<00:02, 48.7MB/s]
     24%|##3       | 40.4M/170M [00:00<00:02, 56.3MB/s]
     28%|##8       | 48.4M/170M [00:00<00:02, 63.4MB/s]
     33%|###2      | 56.0M/170M [00:00<00:02, 59.1MB/s]
     38%|###7      | 64.0M/170M [00:01<00:02, 53.9MB/s]
     42%|####2     | 72.0M/170M [00:01<00:01, 59.1MB/s]
     47%|####7     | 80.0M/170M [00:01<00:01, 60.7MB/s]
     52%|#####1    | 88.0M/170M [00:01<00:01, 56.2MB/s]
     57%|#####6    | 96.0M/170M [00:01<00:01, 60.4MB/s]
     61%|######1   | 104M/170M [00:01<00:01, 60.7MB/s] 
     66%|######6   | 112M/170M [00:01<00:00, 66.8MB/s]
     71%|#######   | 120M/170M [00:02<00:00, 70.2MB/s]
     75%|#######4  | 127M/170M [00:02<00:00, 61.5MB/s]
     78%|#######8  | 133M/170M [00:02<00:00, 45.1MB/s]
 
     84%|########3 | 142M/170M [00:02<00:00, 55.0MB/s]
     87%|########7 | 148M/170M [00:02<00:00, 52.1MB/s]
     91%|######### | 154M/170M [00:02<00:00, 49.0MB/s]
     94%|#########4| 160M/170M [00:02<00:00, 50.0MB/s]
     98%|#########7| 166M/170M [00:03<00:00, 51.3MB/s]
    100%|##########| 170M/170M [00:03<00:00, 56.6MB/s]
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/nn/functional.py:3897: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
       for i in range(dim)
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:124: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
@@ -299,7 +299,7 @@ Get boxes with score larger than 0.9
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  22.255 seconds)
+   **Total running time of the script:** ( 3 minutes  27.860 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_object_detection_pytorch.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
index 90892d21d4..22b9aad95b 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized.rst.txt
@@ -227,7 +227,7 @@ training. Other models require a full post training calibration.
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and will be removed in 0.15. The current behavior is equivalent to passing `weights=MobileNet_V2_Weights.IMAGENET1K_V1`. You can also use `weights=MobileNet_V2_Weights.DEFAULT` to get the most up-to-date weights.
       warnings.warn(msg)
     Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
-
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     59%|#####8    | 7.99M/13.6M [00:00<00:00, 50.9MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 53.9MB/s]
+
      0%|          | 0.00/13.6M [00:00<?, ?B/s]
     59%|#####8    | 7.99M/13.6M [00:00<00:00, 60.1MB/s]
    100%|##########| 13.6M/13.6M [00:00<00:00, 62.2MB/s]
 
 
 
@@ -409,7 +409,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      90.1450      90.0606      94.1685      89.8995       0.4423   
+      90.5382      90.5035      92.3326      90.2597       0.2281   
                
 
 
@@ -458,7 +458,7 @@ TODO
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  11.751 seconds)
+   **Total running time of the script:** ( 1 minutes  13.060 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
index 1fb4f632e0..256cfa9012 100644
--- a/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_prequantized_tflite.rst.txt
@@ -423,7 +423,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      118.1641     118.2210     119.2519     117.1100      0.4611   
+      121.2507     121.2005     125.3829     120.1212      0.5733   
                
 
 
@@ -460,7 +460,7 @@ Here we give an example of how to measure performance of TVM compiled models.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 2 minutes  36.709 seconds)
+   **Total running time of the script:** ( 2 minutes  33.738 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_prequantized_tflite.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
index 9cb9fd5e12..98fbcfd631 100644
--- a/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_quantized.rst.txt
@@ -257,7 +257,7 @@ We create a Relay VM to build and execute the model.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  42.416 seconds)
+   **Total running time of the script:** ( 1 minutes  35.538 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_quantized.py:
diff --git a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
index 3d3e1e7c4b..f3b4c312ee 100644
--- a/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
+++ b/docs/_sources/how_to/deploy_models/deploy_ssd_gluoncv.rst.txt
@@ -170,7 +170,7 @@ Convert and compile model for CPU.
             data: None
       input_sym_arg_type = in_param.infer_type()[0]
     Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
-
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|4         | 5640/132723 [00:00<00:02, 56397.90KB/s]
     10%|9         | 12997/132723 [00:00<00:01, 66494.82KB/s]
     16%|#5        | 20598/132723 [00:00<00:01, 70834.49KB/s]
     21%|##1       | 28475/132723 [00:00<00:01, 73963.67KB/s]
     27%|##7       | 36366/132723 [00:00<00:01, 75715.09KB/s]
     33%|###3      | 44179/132723 [00:00<00:01, 76534.74KB/s]
     39%|###9      | 51985/132723 [00:00<00:01, 77030.29KB/s]
     45%|####5     | 59828/132723 [00:00<00:00, 77473.83KB/s]
     51%|#####     | 67646/132723 [00:00<00:00, 77692.56KB/s]
     57%|#####6    | 75558/132723 [00:01<00:00, 78131.71KB/s]
     63%|######2   | 83372/132723 [00:01<00:00, 77757.85KB/s]
     69%|######8   | 91186/132723 [00:01<00:00, 77866.92KB/s]
     75%|#######4  | 99068/132723 [00:01<00:00, 78153.94KB/s]
     81%|########  | 106946/132723 [00:01<00:00, 78337.39KB/s]
     86%|########6 | 114780/132723 [00:01<00:00, 78271.51KB/s]
     92%|#########
 2| 122672/132723 [00:01<00:00, 78465.54KB/s]
     98%|#########8| 130574/132723 [00:01<00:00, 78630.60KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 76781.09KB/s]
+
      0%|          | 0/132723 [00:00<?, ?KB/s]
      4%|3         | 5205/132723 [00:00<00:02, 52016.81KB/s]
     10%|9         | 13190/132723 [00:00<00:01, 68381.47KB/s]
     16%|#5        | 21199/132723 [00:00<00:01, 73721.89KB/s]
     22%|##2       | 29223/132723 [00:00<00:01, 76292.89KB/s]
     28%|##8       | 37243/132723 [00:00<00:01, 77698.35KB/s]
     34%|###4      | 45287/132723 [00:00<00:01, 78624.08KB/s]
     40%|####      | 53337/132723 [00:00<00:01, 79232.31KB/s]
     46%|####6     | 61384/132723 [00:00<00:00, 79623.46KB/s]
     52%|#####2    | 69385/132723 [00:00<00:00, 79740.27KB/s]
     58%|#####8    | 77484/132723 [00:01<00:00, 80123.65KB/s]
     64%|######4   | 85573/132723 [00:01<00:00, 80354.62KB/s]
     71%|#######   | 93657/132723 [00:01<00:00, 80485.65KB/s]
     77%|#######6  | 101742/132723 [00:01<00:00, 80593.11KB/s]
     83%|########2 | 109802/132723 [00:01<00:00, 80553.98KB/s]
     89%|########8 | 117858/132723 [00:01<00:00, 80473.85KB/s]
     95%|########
 #4| 125906/132723 [00:01<00:00, 80194.88KB/s]
    100%|##########| 132723/132723 [00:01<00:00, 78667.62KB/s]
 
 
 
@@ -246,7 +246,7 @@ Display result
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 3 minutes  25.637 seconds)
+   **Total running time of the script:** ( 3 minutes  29.501 seconds)
 
 
 .. _sphx_glr_download_how_to_deploy_models_deploy_ssd_gluoncv.py:
diff --git a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
index 4de9a86a04..5ad8a637e6 100644
--- a/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/deploy_models/sg_execution_times.rst.txt
@@ -5,26 +5,26 @@
 
 Computation times
 =================
-**14:45.499** total execution time for **how_to_deploy_models** files:
+**14:47.952** total execution time for **how_to_deploy_models** files:
 
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 03:25.637 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_ssd_gluoncv.py` (``deploy_ssd_gluoncv.py``)                           | 03:29.501 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:22.255 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_object_detection_pytorch.py` (``deploy_object_detection_pytorch.py``) | 03:27.860 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:36.709 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized_tflite.py` (``deploy_prequantized_tflite.py``)           | 02:33.738 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:42.416 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_quantized.py` (``deploy_quantized.py``)                               | 01:35.538 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:11.751 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_prequantized.py` (``deploy_prequantized.py``)                         | 01:13.060 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_adreno.py` (``deploy_model_on_adreno.py``)                   | 00:54.388 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_adreno.py` (``deploy_model_on_adreno.py``)                   | 00:53.267 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:39.185 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_android.py` (``deploy_model_on_android.py``)                 | 00:40.132 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:26.705 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_nano.py` (``deploy_model_on_nano.py``)                       | 00:27.693 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:26.447 | 0.0 MB |
+| :ref:`sphx_glr_how_to_deploy_models_deploy_model_on_rasp.py` (``deploy_model_on_rasp.py``)                       | 00:27.158 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_deploy_models_deploy_sparse.py` (``deploy_sparse.py``)                                     | 00:00.006 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
index a5fac33eea..c42d08d79f 100644
--- a/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/bring_your_own_datatypes.rst.txt
@@ -463,7 +463,7 @@ First let us define two helper functions to get the mobilenet model and a cat im
 
  .. code-block:: none
 
-    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipb9b11189-fcad-489c-b4dc-d59850bff597 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+    Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip07ed05b1-4d90-462b-a58b-b6a364e3366a from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 
 
 
diff --git a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
index fca367e89c..9c6ae9e55b 100644
--- a/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:51.322** total execution time for **how_to_extend_tvm** files:
+**00:53.032** total execution time for **how_to_extend_tvm** files:
 
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:47.694 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_bring_your_own_datatypes.py` (``bring_your_own_datatypes.py``) | 00:49.186 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.577 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_instrument.py` (``use_pass_instrument.py``)           | 00:02.741 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.044 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_use_pass_infra.py` (``use_pass_infra.py``)                     | 00:01.097 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.007 | 0.0 MB |
+| :ref:`sphx_glr_how_to_extend_tvm_low_level_custom_pass.py` (``low_level_custom_pass.py``)       | 00:00.008 | 0.0 MB |
 +-------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
index dff3a01f3b..ed78915f9b 100644
--- a/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
+++ b/docs/_sources/how_to/extend_tvm/use_pass_instrument.rst.txt
@@ -220,10 +220,10 @@ profile the execution time of each passes.
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 17965us [17965us] (48.54%; 48.54%)
-    FoldScaleAxis: 19042us [6us] (51.46%; 51.46%)
-            FoldConstant: 19035us [1705us] (51.44%; 99.97%)
-                    InferType: 17330us [17330us] (46.83%; 91.04%)
+    InferType: 19948us [19948us] (50.28%; 50.28%)
+    FoldScaleAxis: 19724us [10us] (49.72%; 49.72%)
+            FoldConstant: 19714us [1811us] (49.69%; 99.95%)
+                    InferType: 17903us [17903us] (45.13%; 90.81%)
 
 
 
@@ -262,10 +262,10 @@ Refer to following sections and :py:func:`tvm.instrument.pass_instrument` for th
  .. code-block:: none
 
     Printing results of timing profile...
-    InferType: 17388us [17388us] (47.56%; 47.56%)
-    FoldScaleAxis: 19173us [5us] (52.44%; 52.44%)
-            FoldConstant: 19168us [1688us] (52.43%; 99.98%)
-                    InferType: 17481us [17481us] (47.81%; 91.20%)
+    InferType: 18155us [18155us] (48.05%; 48.05%)
+    FoldScaleAxis: 19625us [7us] (51.95%; 51.95%)
+            FoldConstant: 19618us [1787us] (51.93%; 99.96%)
+                    InferType: 17831us [17831us] (47.20%; 90.89%)
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
index 6f018adc08..79561b73aa 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_cuda.rst.txt
@@ -331,7 +331,7 @@ latency of convolution.
 
  .. code-block:: none
 
-    Convolution: 54.126304 ms
+    Convolution: 41.779296 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
index 921714fdee..68d9ef3608 100644
--- a/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_conv_tensorcore.rst.txt
@@ -608,7 +608,7 @@ be able to run on our build server
 
  .. code-block:: none
 
-    conv2d with tensor core: 6.691885 ms
+    conv2d with tensor core: 7.157965 ms
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
index 8f310f60de..c9ab053a09 100644
--- a/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/opt_gemm.rst.txt
@@ -134,8 +134,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 
  .. code-block:: none
 
-    Numpy running time: 0.019105
-    Baseline: 3.258741
+    Numpy running time: 0.018668
+    Baseline: 3.404827
 
 
 
@@ -227,7 +227,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 
  .. code-block:: none
 
-    Opt1: 0.298000
+    Opt1: 0.304255
 
 
 
@@ -318,7 +318,7 @@ In this tutorial, we chose to vectorize the inner loop row data since it is cach
 
  .. code-block:: none
 
-    Opt2: 0.332539
+    Opt2: 0.339157
 
 
 
@@ -406,7 +406,7 @@ the access pattern for A matrix is more cache friendly.
 
  .. code-block:: none
 
-    Opt3: 0.114561
+    Opt3: 0.117516
 
 
 
@@ -523,7 +523,7 @@ flattening.
 
  .. code-block:: none
 
-    Opt4: 0.108607
+    Opt4: 0.109511
 
 
 
@@ -635,7 +635,7 @@ write to C when all the block results are ready.
 
  .. code-block:: none
 
-    Opt5: 0.111281
+    Opt5: 0.110792
 
 
 
@@ -748,7 +748,7 @@ Furthermore, we can also utilize multi-core processors to do the thread-level pa
 
  .. code-block:: none
 
-    Opt6: 0.146370
+    Opt6: 0.146887
 
 
 
diff --git a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
index 8552524fd5..b93fd49015 100644
--- a/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/optimize_operators/sg_execution_times.rst.txt
@@ -5,12 +5,12 @@
 
 Computation times
 =================
-**00:34.377** total execution time for **how_to_optimize_operators** files:
+**00:34.897** total execution time for **how_to_optimize_operators** files:
 
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:31.833 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_gemm.py` (``opt_gemm.py``)                       | 00:32.409 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.469 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_tensorcore.py` (``opt_conv_tensorcore.py``) | 00:01.437 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.076 | 0.0 MB |
+| :ref:`sphx_glr_how_to_optimize_operators_opt_conv_cuda.py` (``opt_conv_cuda.py``)             | 00:01.052 | 0.0 MB |
 +-----------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
index 6a6f84c082..4679a99f3e 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**09:08.538** total execution time for **how_to_tune_with_autoscheduler** files:
+**09:18.502** total execution time for **how_to_tune_with_autoscheduler** files:
 
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 05:29.796 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py` (``tune_conv2d_layer_cuda.py``) | 05:39.006 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:37.055 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_x86.py` (``tune_network_x86.py``)             | 01:38.823 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 01:04.667 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_cuda.py` (``tune_network_cuda.py``)           | 01:06.225 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:30.964 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_sparse_x86.py` (``tune_sparse_x86.py``)               | 00:27.877 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:13.513 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_arm.py` (``tune_network_arm.py``)             | 00:13.863 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:12.544 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autoscheduler_tune_network_mali.py` (``tune_network_mali.py``)           | 00:12.707 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
index 17c88d7272..b50e9be73a 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.rst.txt
@@ -245,13 +245,13 @@ cooperative fetching, unrolling and operator fusion.
             bias_1 = T.match_buffer(bias, (1, 512, 1, 1))
             compute_1 = T.match_buffer(compute, (1, 512, 7, 7))
             blockIdx_x = T.env_thread("blockIdx.x")
-            T.launch_thread(blockIdx_x, 28)
-            conv2d_nchw = T.allocate([14], "float32", "local")
-            pad_temp_shared = T.allocate([72], "float32", "shared")
-            kernel_shared = T.allocate([3072], "float32", "shared")
+            T.launch_thread(blockIdx_x, 16)
+            conv2d_nchw = T.allocate([8], "float32", "local")
+            pad_temp_shared = T.allocate([1008], "float32", "shared")
+            kernel_shared = T.allocate([1536], "float32", "shared")
             threadIdx_x = T.env_thread("threadIdx.x")
-            T.launch_thread(threadIdx_x, 64)
-            conv2d_nchw_1 = T.buffer_decl((14,), data=conv2d_nchw, scope="local", align=32)
+            T.launch_thread(threadIdx_x, 196)
+            conv2d_nchw_1 = T.buffer_decl((1,), data=conv2d_nchw, scope="local", align=4)
             conv2d_nchw_1[0] = T.float32(0)
             conv2d_nchw_1[1] = T.float32(0)
             conv2d_nchw_1[2] = T.float32(0)
@@ -260,466 +260,40 @@ cooperative fetching, unrolling and operator fusion.
             conv2d_nchw_1[5] = T.float32(0)
             conv2d_nchw_1[6] = T.float32(0)
             conv2d_nchw_1[7] = T.float32(0)
-            conv2d_nchw_1[8] = T.float32(0)
-            conv2d_nchw_1[9] = T.float32(0)
-            conv2d_nchw_1[10] = T.float32(0)
-            conv2d_nchw_1[11] = T.float32(0)
-            conv2d_nchw_1[12] = T.float32(0)
-            conv2d_nchw_1[13] = T.float32(0)
-            for rc_outer_outer, ry_outer_outer in T.grid(64, 3):
-                cse_var_2: T.int32 = rc_outer_outer * 72
-                cse_var_1: T.int32 = ry_outer_outer * 3
-                threadIdx_x_1 = T.env_thread("threadIdx.x")
-                pad_temp_shared_1 = T.buffer_decl((72,), data=pad_temp_shared, scope="shared")
-                with T.launch_thread(threadIdx_x_1, 64):
-                    data_2 = T.buffer_decl((25088,), data=data_1.data)
-                    if T.likely(threadIdx_x_1 < 18):
-                        pad_temp_shared_1[threadIdx_x_1 * 4] = T.if_then_else(1 <= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 < 8 and 1 <= threadIdx_x_1 * 4 % 9 and threadIdx_x_1 * 4 % 9 < 8, data_2[rc_outer_outer * 392 + threadIdx_x_1 * 4 // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + threadIdx_x_1 * 4 % 9 - 8], T.float32(0))
-                    if T.likely(threadIdx_x_1 < 18):
-                        pad_temp_shared_1[threadIdx_x_1 * 4 + 1] = T.if_then_else(1 <= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 < 8 and 1 <= (threadIdx_x_1 * 4 + 1) % 9 and (threadIdx_x_1 * 4 + 1) % 9 < 8, data_2[rc_outer_outer * 392 + (threadIdx_x_1 * 4 + 1) // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + (threadIdx_x_1 * 4 + 1) % 9 - 8], T.float32(0))
-                    if T.likely(threadIdx_x_1 < 18):
-                        pad_temp_shared_1[threadIdx_x_1 * 4 + 2] = T.if_then_else(1 <= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 < 8 and 1 <= (threadIdx_x_1 * 4 + 2) % 9 and (threadIdx_x_1 * 4 + 2) % 9 < 8, data_2[rc_outer_outer * 392 + (threadIdx_x_1 * 4 + 2) // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + (threadIdx_x_1 * 4 + 2) % 9 - 8], T.float32(0))
-                    if T.likely(threadIdx_x_1 < 18):
-                        pad_temp_shared_1[threadIdx_x_1 * 4 + 3] = T.if_then_else(1 <= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 < 8 and 1 <= (threadIdx_x_1 * 4 + 3) % 9 and (threadIdx_x_1 * 4 + 3) % 9 < 8, data_2[rc_outer_outer * 392 + (threadIdx_x_1 * 4 + 3) // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + (threadIdx_x_1 * 4 + 3) % 9 - 8], T.float32(0))
-                threadIdx_x_2 = T.env_thread("threadIdx.x")
-                kernel_shared_1 = T.buffer_decl((3072,), data=kernel_shared, scope="shared")
-                kernel_2 = T.buffer_decl((2359296,), data=kernel_1.data)
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 64] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 64) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 128] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 128) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 192] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 36864]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 256] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 256) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 320] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 320) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 384] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 73728]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 448] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 448) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 512] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 512) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 576] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 110592]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 640] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 640) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 704] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 704) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 768] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 147456]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 832] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 832) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 896] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 896) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 960] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 184320]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1024] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1024) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1088] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1088) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1152] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 221184]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1216] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1216) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1280] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1280) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1344] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 258048]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1408] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1408) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1472] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1472) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1536] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 294912]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1600] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1600) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1664] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1664) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1728] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 331776]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1792] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1792) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1856] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1856) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1920] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 368640]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 1984] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1984) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2048] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2048) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2112] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 405504]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2176] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2176) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2240] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2240) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2304] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 442368]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2368] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2368) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2432] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2432) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2496] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 479232]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2560] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2560) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2624] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2624) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2688] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 516096]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2752] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2752) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2816] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2816) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2880] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 552960]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 2944] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2944) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-                with T.launch_thread(threadIdx_x_2, 64):
-                    kernel_shared_1[threadIdx_x_2 + 3008] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 3008) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[0] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[9] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 3]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[0] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[9] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 24]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 27]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 1]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 4]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 25]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 28]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[8] * kernel_shared_1[threadIdx_x * 48 + 2]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[17] * kernel_shared_1[threadIdx_x * 48 + 5]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[8] * kernel_shared_1[threadIdx_x * 48 + 26]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[17] * kernel_shared_1[threadIdx_x * 48 + 29]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[18] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[27] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 6]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 9]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[18] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[27] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 30]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 33]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 7]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 10]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 31]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 34]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[26] * kernel_shared_1[threadIdx_x * 48 + 8]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[35] * kernel_shared_1[threadIdx_x * 48 + 11]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[26] * kernel_shared_1[threadIdx_x * 48 + 32]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[35] * kernel_shared_1[threadIdx_x * 48 + 35]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[36] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[45] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 12]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 15]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[36] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[45] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 36]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 39]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 13]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 16]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 37]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 40]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[44] * kernel_shared_1[threadIdx_x * 48 + 14]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[53] * kernel_shared_1[threadIdx_x * 48 + 17]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[44] * kernel_shared_1[threadIdx_x * 48 + 38]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[53] * kernel_shared_1[threadIdx_x * 48 + 41]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[54] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[63] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 18]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 21]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[54] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[63] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 42]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 45]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 19]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 22]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 43]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 46]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[62] * kernel_shared_1[threadIdx_x * 48 + 20]
-                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[71] * kernel_shared_1[threadIdx_x * 48 + 23]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 47]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 47]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 47]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 47]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 47]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 47]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[62] * kernel_shared_1[threadIdx_x * 48 + 44]
-                conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[71] * kernel_shared_1[threadIdx_x * 48 + 47]
-            for i1_inner, i3_inner in T.grid(2, 7):
-                compute_2 = T.buffer_decl((25088,), data=compute_1.data)
-                bias_2 = T.buffer_decl((512,), data=bias_1.data)
-                compute_2[blockIdx_x // 7 * 6272 + threadIdx_x * 98 + i1_inner * 49 + blockIdx_x % 7 * 7 + i3_inner] = T.max(conv2d_nchw_1[i1_inner * 7 + i3_inner] + bias_2[blockIdx_x // 7 * 128 + threadIdx_x * 2 + i1_inner], T.float32(0))
+            for rc_outer_outer, rx_outer_outer in T.grid(32, 3):
+                pad_temp_shared_1 = T.buffer_decl((1008,), data=pad_temp_shared, scope="shared")
+                for ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer in range(6):
+                    threadIdx_x_1 = T.env_thread("threadIdx.x")
+                    T.launch_thread(threadIdx_x_1, 196)
+                    if T.likely(ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 7 + threadIdx_x_1 // 28 < 36):
+                        data_2 = T.buffer_decl((25088,), data=data_1.data)
+                        pad_temp_shared_1[ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 196 + threadIdx_x_1] = T.if_then_else(1 <= (threadIdx_x_1 // 7 + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9 and (threadIdx_x_1 // 7 + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9 < 8 and 1 <= rx_outer_outer + threadIdx_x_1 % 7 and rx_outer_outer + threadIdx_x_1 % 7 < 8, data_2[rc_outer_outer * 784 + (ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 28 + threadIdx_x_1 // 7) // 9 * 49 + ( [...]
+                kernel_shared_1 = T.buffer_decl((1536,), data=kernel_shared, scope="shared")
+                for ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer in range(8):
+                    threadIdx_x_1 = T.env_thread("threadIdx.x")
+                    T.launch_thread(threadIdx_x_1, 196)
+                    if T.likely(ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 49 + threadIdx_x_1 // 4 < 384):
+                        kernel_2 = T.buffer_decl((2359296,), data=kernel_1.data)
+                        kernel_shared_1[ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 196 + threadIdx_x_1] = kernel_2[blockIdx_x * 147456 + (ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 49 + threadIdx_x_1 // 4) // 12 * 4608 + rc_outer_outer * 144 + (ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 4 + threadIdx_x_1) % 48 // 3 * 9 + (threadIdx_x_1 + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 3 * 3 + rx_outer_outer]
+                for rc_outer_inner, ry_outer_inner, rc_inner in T.grid(8, 3, 2):
+                    conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner]
+                    conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 192]
+                    conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 384]
+                    conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 576]
+                    conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 768]
+                    conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 960]
+                    conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 1152]
+                    conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 1344]
+            compute_2 = T.buffer_decl((25088,), data=compute_1.data)
+            bias_2 = T.buffer_decl((512,), data=bias_1.data)
+            compute_2[blockIdx_x * 1568 + threadIdx_x] = T.max(conv2d_nchw_1[0] + bias_2[blockIdx_x * 32 + threadIdx_x // 49], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 196] = T.max(conv2d_nchw_1[1] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 4], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 392] = T.max(conv2d_nchw_1[2] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 8], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 588] = T.max(conv2d_nchw_1[3] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 12], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 784] = T.max(conv2d_nchw_1[4] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 16], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 980] = T.max(conv2d_nchw_1[5] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 20], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 1176] = T.max(conv2d_nchw_1[6] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 24], T.float32(0))
+            compute_2[blockIdx_x * 1568 + threadIdx_x + 1372] = T.max(conv2d_nchw_1[7] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 28], T.float32(0))
 
 
 
@@ -769,7 +343,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 0.354 ms
+    Execution time of this operator: 0.386 ms
 
 
 
@@ -818,35 +392,35 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
     conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
     conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
-    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
+    conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
+    conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=4)
+    conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=8)
     conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
     conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+    conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
     conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
     conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
-    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
+    conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+    conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
     conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
     conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
-    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
+    conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=8)
     conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
-    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
+    conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
     conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+    conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
     s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2 [...]
     compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
     compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
     compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
-    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
+    compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
+    compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=4)
+    compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=8)
     compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+    compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
     compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
-    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
+    compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+    compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
     compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
     s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
     s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -866,14 +440,14 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
     kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+    kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=196)
     s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
     pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
     s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+    pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=196)
     s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis("threadIdx.x"))
-    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 512)
+    s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "auto_unroll_max_step", 0)
     s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, "unroll_explicit", True)
 
     CUDA source code:
@@ -891,10 +465,10 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       #define int64_t long long
       #define uint64_t unsigned long long
     #endif
-    extern "C" __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-      float conv2d_nchw[14];
-      __shared__ float pad_temp_shared[72];
-      __shared__ float kernel_shared[3072];
+    extern "C" __global__ void __launch_bounds__(196) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+      float conv2d_nchw[8];
+      __shared__ float pad_temp_shared[1008];
+      __shared__ float kernel_shared[1536];
       conv2d_nchw[0] = 0.000000e+00f;
       conv2d_nchw[1] = 0.000000e+00f;
       conv2d_nchw[2] = 0.000000e+00f;
@@ -903,419 +477,44 @@ They can be used for debugging and learning the behavior of the auto-scheduler.
       conv2d_nchw[5] = 0.000000e+00f;
       conv2d_nchw[6] = 0.000000e+00f;
       conv2d_nchw[7] = 0.000000e+00f;
-      conv2d_nchw[8] = 0.000000e+00f;
-      conv2d_nchw[9] = 0.000000e+00f;
-      conv2d_nchw[10] = 0.000000e+00f;
-      conv2d_nchw[11] = 0.000000e+00f;
-      conv2d_nchw[12] = 0.000000e+00f;
-      conv2d_nchw[13] = 0.000000e+00f;
-      for (int rc_outer_outer = 0; rc_outer_outer < 64; ++rc_outer_outer) {
-        for (int ry_outer_outer = 0; ry_outer_outer < 3; ++ry_outer_outer) {
+      for (int rc_outer_outer = 0; rc_outer_outer < 32; ++rc_outer_outer) {
+        for (int rx_outer_outer = 0; rx_outer_outer < 3; ++rx_outer_outer) {
           __syncthreads();
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= ((((int)threadIdx.x) * 4) % 9))) && (((((int)threadIdx.x) * 4) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
+          for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer < 6; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) {
+            if (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 7) + (((int)threadIdx.x) / 28)) < 36) {
+              pad_temp_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 196) + ((int)threadIdx.x))] = (((((1 <= (((((int)threadIdx.x) / 7) + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9)) && ((((((int)threadIdx.x) / 7) + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9) < 8)) && (1 <= (rx_outer_outer + (((int)threadIdx.x) % 7)))) && ((rx_outer_outer + (((int)threadIdx.x) % 7)) < 8)) ? data[((((((rc_outer_outer * 784) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 2 [...]
+            }
           }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 1) % 9))) && ((((((int)threadIdx.x) * 4) + 1) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
+          for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 < 8; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1) {
+            if (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 49) + (((int)threadIdx.x) >> 2)) < 384) {
+              kernel_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 196) + ((int)threadIdx.x))] = kernel[((((((((int)blockIdx.x) * 147456) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 49) + (((int)threadIdx.x) >> 2)) / 12) * 4608)) + (rc_outer_outer * 144)) + (((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 4) + ((int)threadIdx.x)) % 48) / 3) * 9)) + (((((int)threadIdx.x) + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1) % 3) * 3)) + rx_outer_outer)];
+            }
           }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 2) % 9))) && ((((((int)threadIdx.x) * 4) + 2) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
-          }
-          if (((int)threadIdx.x) < 18) {
-            pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 <= (ry_outer_outer + (((int)blockIdx.x) % 7))) && ((ry_outer_outer + (((int)blockIdx.x) % 7)) < 8)) && (1 <= (((((int)threadIdx.x) * 4) + 3) % 9))) && ((((((int)threadIdx.x) * 4) + 3) % 9) < 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
-          }
-          kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
-          kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
-          kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
-          kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
-          kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
-          kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
-          kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
-          kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
-          kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
-          kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
-          kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
-          kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
-          kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
-          kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
-          kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
-          kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-          kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
           __syncthreads();
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-          conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-          conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-        }
-      }
-      for (int i1_inner = 0; i1_inner < 2; ++i1_inner) {
-        for (int i3_inner = 0; i3_inner < 7; ++i3_inner) {
-          compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
+          for (int rc_outer_inner = 0; rc_outer_inner < 8; ++rc_outer_inner) {
+            for (int ry_outer_inner = 0; ry_outer_inner < 3; ++ry_outer_inner) {
+              for (int rc_inner = 0; rc_inner < 2; ++rc_inner) {
+                conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[(((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner)]));
+                conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 192)]));
+                conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 384)]));
+                conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 576)]));
+                conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 768)]));
+                conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 960)]));
+                conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 1152)]));
+                conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 1344)]));
+              }
+            }
+          }
         }
       }
+      compute[((((int)blockIdx.x) * 1568) + ((int)threadIdx.x))] = max((conv2d_nchw[0] + bias[((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49))]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 196)] = max((conv2d_nchw[1] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 4)]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 392)] = max((conv2d_nchw[2] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 8)]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 588)] = max((conv2d_nchw[3] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 12)]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 784)] = max((conv2d_nchw[4] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 16)]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 980)] = max((conv2d_nchw[5] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 20)]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 1176)] = max((conv2d_nchw[6] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 24)]), 0.000000e+00f);
+      compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 1372)] = max((conv2d_nchw[7] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 28)]), 0.000000e+00f);
     }
 
 
@@ -1376,7 +575,7 @@ In the example below we resume the status and do more 5 trials.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 5 minutes  29.796 seconds)
+   **Total running time of the script:** ( 5 minutes  39.006 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_conv2d_layer_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
index 82a84a89fe..d0c4c827f3 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_cuda.rst.txt
@@ -647,7 +647,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-       7.8726       7.8762       7.8767       7.8649       0.0054   
+       7.8501       7.8517       7.8542       7.8445       0.0041   
                
 
 
@@ -675,7 +675,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  4.667 seconds)
+   **Total running time of the script:** ( 1 minutes  6.225 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_cuda.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
index fa1543e64b..1facf8429c 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_network_x86.rst.txt
@@ -666,7 +666,7 @@ so we can read the log file and load the best schedules.
     Evaluate inference time cost...
     Execution time summary:
      mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
-      744.6278     743.8504     746.7951     743.2380      1.5527   
+      753.1335     753.1278     753.5897     752.6829      0.3702   
                
 
 
@@ -694,7 +694,7 @@ Other Tips
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  37.055 seconds)
+   **Total running time of the script:** ( 1 minutes  38.823 seconds)
 
 
 .. _sphx_glr_download_how_to_tune_with_autoscheduler_tune_network_x86.py:
diff --git a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
index a318ead3ce..8696d460f2 100644
--- a/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
+++ b/docs/_sources/how_to/tune_with_autoscheduler/tune_sparse_x86.rst.txt
@@ -395,71 +395,22 @@ layout transformation, parallelization, vectorization, unrolling, and operator f
             for i0_outer_i1_outer_fused in T.parallel(32):
                 compute_2 = T.allocate([2048], "float32", "global")
                 compute_3 = T.buffer_decl((2048,), data=compute_2)
-                for i_outer_inner, nb_j_inner in T.grid(4, 2):
-                    for i_inner_init in range(16):
-                        cse_var_1: T.int32 = i_outer_inner * 512 + i_inner_init * 32 + nb_j_inner * 16
-                        compute_3[cse_var_1] = T.float32(0)
-                        compute_3[cse_var_1 + 1] = T.float32(0)
-                        compute_3[cse_var_1 + 2] = T.float32(0)
-                        compute_3[cse_var_1 + 3] = T.float32(0)
-                        compute_3[cse_var_1 + 4] = T.float32(0)
-                        compute_3[cse_var_1 + 5] = T.float32(0)
-                        compute_3[cse_var_1 + 6] = T.float32(0)
-                        compute_3[cse_var_1 + 7] = T.float32(0)
-                        compute_3[cse_var_1 + 8] = T.float32(0)
-                        compute_3[cse_var_1 + 9] = T.float32(0)
-                        compute_3[cse_var_1 + 10] = T.float32(0)
-                        compute_3[cse_var_1 + 11] = T.float32(0)
-                        compute_3[cse_var_1 + 12] = T.float32(0)
-                        compute_3[cse_var_1 + 13] = T.float32(0)
-                        compute_3[cse_var_1 + 14] = T.float32(0)
-                        compute_3[cse_var_1 + 15] = T.float32(0)
-                    for elem_idx, i_inner in T.grid(T.let(cse_var_2, i0_outer_i1_outer_fused % 16 * 2 + nb_j_inner, placeholder_10[cse_var_2 + 1] - placeholder_10[cse_var_2]), 16):
-                        cse_var_2 = T.var("int32")
+                for i_outer_inner in range(16):
+                    for i_inner_init, j_init in T.grid(8, 16):
+                        compute_3[i_outer_inner * 128 + i_inner_init * 16 + j_init] = T.float32(0)
+                    for elem_idx, i_inner, j in T.grid(placeholder_10[i0_outer_i1_outer_fused + 1] - placeholder_10[i0_outer_i1_outer_fused], 8, 16):
                         placeholder_10 = T.buffer_decl((33,), "int32", data=placeholder_8.data)
-                        cse_var_21: T.int32 = elem_idx * 16
-                        cse_var_20: T.int32 = i0_outer_i1_outer_fused % 16 * 2 + nb_j_inner
-                        cse_var_19: T.int32 = i_outer_inner * 512 + i_inner * 32 + nb_j_inner * 16
-                        cse_var_18: T.int32 = i0_outer_i1_outer_fused // 16 * 16384 + i_outer_inner * 4096 + i_inner * 256
-                        cse_var_17: T.int32 = cse_var_19 + 9
-                        cse_var_16: T.int32 = cse_var_19 + 8
-                        cse_var_15: T.int32 = cse_var_19 + 7
-                        cse_var_14: T.int32 = cse_var_19 + 6
-                        cse_var_13: T.int32 = cse_var_19 + 5
-                        cse_var_12: T.int32 = cse_var_19 + 4
-                        cse_var_11: T.int32 = cse_var_19 + 3
-                        cse_var_10: T.int32 = cse_var_19 + 2
-                        cse_var_9: T.int32 = cse_var_19 + 15
-                        cse_var_8: T.int32 = cse_var_19 + 14
-                        cse_var_7: T.int32 = cse_var_19 + 13
-                        cse_var_6: T.int32 = cse_var_19 + 12
-                        cse_var_5: T.int32 = cse_var_19 + 11
-                        cse_var_4: T.int32 = cse_var_19 + 10
-                        cse_var_3: T.int32 = cse_var_19 + 1
-                        placeholder_11 = T.buffer_decl((78656,), data=placeholder_6.data)
-                        placeholder_12 = T.buffer_decl((32768,), data=placeholder_5.data)
-                        placeholder_13 = T.buffer_decl((4916,), "int32", data=placeholder_7.data)
-                        compute_3[cse_var_19] = compute_3[cse_var_19] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_3] = compute_3[cse_var_3] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 1] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_10] = compute_3[cse_var_10] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 2] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_11] = compute_3[cse_var_11] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 3] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_12] = compute_3[cse_var_12] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 4] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_13] = compute_3[cse_var_13] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 5] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_14] = compute_3[cse_var_14] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 6] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_15] = compute_3[cse_var_15] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 7] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_16] = compute_3[cse_var_16] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 8] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_17] = compute_3[cse_var_17] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 9] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_4] = compute_3[cse_var_4] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 10] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_5] = compute_3[cse_var_5] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 11] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_6] = compute_3[cse_var_6] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 12] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_7] = compute_3[cse_var_7] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 13] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_8] = compute_3[cse_var_8] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 14] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                        compute_3[cse_var_9] = compute_3[cse_var_9] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 15] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                for i0_inner in range(64):
-                    cse_var_22: T.int32 = i0_outer_i1_outer_fused // 16 * 32768 + i0_inner * 512 + i0_outer_i1_outer_fused % 16 * 32
+                        if T.likely(elem_idx < placeholder_10[i0_outer_i1_outer_fused + 1] - placeholder_10[i0_outer_i1_outer_fused]):
+                            placeholder_11 = T.buffer_decl((78656,), data=placeholder_6.data)
+                            placeholder_12 = T.buffer_decl((32768,), data=placeholder_5.data)
+                            placeholder_13 = T.buffer_decl((4916,), "int32", data=placeholder_7.data)
+                            cse_var_1: T.int32 = i_outer_inner * 128 + i_inner * 16 + j
+                            compute_3[cse_var_1] = compute_3[cse_var_1] + placeholder_11[placeholder_10[i0_outer_i1_outer_fused] * 16 + elem_idx * 16 + j] * T.max(placeholder_12[i_outer_inner * 2048 + i_inner * 256 + placeholder_13[placeholder_10[i0_outer_i1_outer_fused] + elem_idx]], T.float32(0))
+                for i0_inner, i1_inner in T.grid(128, 16):
+                    cse_var_2: T.int32 = i0_inner * 512 + i0_outer_i1_outer_fused * 16 + i1_inner
                     compute_4 = T.buffer_decl((65536,), data=compute_1.data)
                     placeholder_10 = T.buffer_decl((65536,), data=placeholder_9.data)
-                    compute_4[cse_var_22:cse_var_22 + 32] = T.max(compute_3[i0_inner * 32:i0_inner * 32 + 32] + placeholder_10[cse_var_22:cse_var_22 + 32], T.Broadcast(T.float32(0), 32))
+                    compute_4[cse_var_2] = T.max(compute_3[i0_inner * 16 + i1_inner] + placeholder_10[cse_var_2], T.float32(0))
 
 
 
@@ -509,7 +460,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 1.716 ms
+    Execution time of this operator: 1.537 ms
 
 
 
diff --git a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
index 23e6ea39be..18ac775c13 100644
--- a/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/sg_execution_times.rst.txt
@@ -5,16 +5,16 @@
 
 Computation times
 =================
-**00:37.938** total execution time for **how_to_tune_with_autotvm** files:
+**00:53.823** total execution time for **how_to_tune_with_autotvm** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:37.906 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_conv2d_cuda.py` (``tune_conv2d_cuda.py``)           | 00:53.787 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.018 | 0.0 MB |
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_x86.py` (``tune_relay_x86.py``)               | 00:00.022 | 0.0 MB |
++--------------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``) | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_cuda.py` (``tune_relay_cuda.py``)             | 00:00.005 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_arm.py` (``tune_relay_arm.py``)               | 00:00.004 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_tune_with_autotvm_tune_relay_mobile_gpu.py` (``tune_relay_mobile_gpu.py``) | 00:00.004 | 0.0 MB |
-+--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
index 6d90968ad0..7c8ca0a501 100644
--- a/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
+++ b/docs/_sources/how_to/tune_with_autotvm/tune_conv2d_cuda.rst.txt
@@ -268,8 +268,7 @@ for this template
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 111.35/111.35   result: MeasureResult(costs=(0.0020790124897959185,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7945833206176758, timestamp=1674022602.5026455)      [('tile_f', [-1, 1, 32, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,979530
-    No: 2   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    No: 1   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -391,8 +390,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 128, 1]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,8198569
-    No: 3   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 64, 1, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7741861
+    No: 2   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -514,272 +513,162 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 256, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2580687
-    No: 4   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
-        func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
-        func = build(s, args, target_host=task.target_host, runtime=runtime)
-      File "/workspace/python/tvm/driver/build_module.py", line 227, in build
-        input_mod = lower(inputs, args, name=name, binds=binds)
-      File "/workspace/python/tvm/driver/build_module.py", line 134, in lower
-        return ffi.lower_schedule(inp, args, name, binds, simple_mode)
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 128, 2, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 128, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9935877
+    No: 3   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 742, in __call__
+        yield remote, remote.load_module(os.path.split(build_result.filename)[1])
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 706, in run_through_rpc
+        costs = time_f(*args).results
+      File "/workspace/python/tvm/runtime/module.py", line 357, in evaluator
+        blob = feval(*args)
       File "tvm/_ffi/_cython/./packed_func.pxi", line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 276, in tvm._ffi._cy3.core.FuncCall
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 262, in tvm._ffi._cy3.core.FuncCall
+      File "tvm/_ffi/_cython/./packed_func.pxi", line 251, in tvm._ffi._cy3.core.FuncCall3
       File "tvm/_ffi/_cython/./base.pxi", line 181, in tvm._ffi._cy3.core.CHECK_CALL
     tvm._ffi.base.TVMError: Traceback (most recent call last):
-      24: TVMFuncCall
+      4: TVMFuncCall
             at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:451
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
-            at ../include/tvm/runtime/packed_func.h:1617
-      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      1: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+      3: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../include/tvm/runtime/packed_func.h:1217
+      2: tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+            at ../src/runtime/rpc/rpc_module.cc:129
+      1: tvm::runtime::RPCClientSession::CallFunc(void*, TVMValue const*, int const*, int, std::function<void (tvm::runtime::TVMArgs)> const&)
+            at ../src/runtime/rpc/rpc_endpoint.cc:1012
+      0: tvm::runtime::RPCEndpoint::CallFunc(void*, TVMValue const*, int const*, int, std::function<void (tvm::runtime::TVMArgs)>)
+            at ../src/runtime/rpc/rpc_endpoint.cc:804
+      File "../src/runtime/rpc/rpc_endpoint.cc", line 804
+    TVMError: 
+    ---------------------------------------------------------------
+    An error occurred during the execution of TVM.
+    For more information, please see: https://tvm.apache.org/docs/errors.html
+    ---------------------------------------------------------------
+      Check failed: (code == RPCCode::kReturn) is false: code=kShutdown
+
+    During handling of the above exception, another exception occurred:
 
     Traceback (most recent call last):
-      24: TVMFuncCall
-            at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:451
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
-            at ../include/tvm/runtime/packed_func.h:1617
-      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      1: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 16, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 2, 128]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10439569
-    No: 5   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
-        res = future.result()
-      File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
-        return self.__get_result()
-      File "/usr/lib/python3.7/concurrent/futures/_base.py", line 384, in __get_result
-        raise self._exception
-      File "/usr/lib/python3.7/concurrent/futures/thread.py", line 57, in run
-        result = self.fn(*self.args, **self.kwargs)
-      File "/workspace/python/tvm/contrib/popen_pool.py", line 432, in <lambda>
-        worker = lambda *args: self._worker_run(*args)
-      File "/workspace/python/tvm/contrib/popen_pool.py", line 401, in _worker_run
-        return proc.recv()
-      File "/workspace/python/tvm/contrib/popen_pool.py", line 309, in recv
-        raise TimeoutError()
-    TimeoutError
-
-            [('tile_f', [-1, 8, 2, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2949493
-    No: 6   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
-        func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
-        func = build(s, args, target_host=task.target_host, runtime=runtime)
-      File "/workspace/python/tvm/driver/build_module.py", line 227, in build
-        input_mod = lower(inputs, args, name=name, binds=binds)
-      File "/workspace/python/tvm/driver/build_module.py", line 134, in lower
-        return ffi.lower_schedule(inp, args, name, binds, simple_mode)
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 276, in tvm._ffi._cy3.core.FuncCall
-      File "tvm/_ffi/_cython/./base.pxi", line 181, in tvm._ffi._cy3.core.CHECK_CALL
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 706, in run_through_rpc
+        costs = time_f(*args).results
+      File "/usr/lib/python3.7/contextlib.py", line 130, in __exit__
+        self.gen.throw(type, value, traceback)
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 746, in __call__
+        remote.remove(build_result.filename)
+      File "/workspace/python/tvm/rpc/client.py", line 144, in remove
+        self._remote_funcs["remove"] = self.get_function("tvm.rpc.server.remove")
+      File "/workspace/python/tvm/rpc/client.py", line 72, in get_function
+        return self._sess.get_function(name)
+      File "/workspace/python/tvm/runtime/module.py", line 171, in get_function
+        self.handle, c_str(name), ctypes.c_int(query_imports), ctypes.byref(ret_handle)
+      File "/workspace/python/tvm/_ffi/base.py", line 348, in check_call
+        raise get_last_ffi_error()
     tvm._ffi.base.TVMError: Traceback (most recent call last):
-      24: TVMFuncCall
-            at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:451
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
+      52: 0xffffffffffffffff
+      51: _start
+      50: __libc_start_main
+      49: _Py_UnixMain
+      48: 0x0000000000650da0
+      47: 0x0000000000650afa
+      46: _PyFunction_FastCallDict
+      45: _PyEval_EvalCodeWithName
+      44: _PyEval_EvalFrameDefault
+      43: _PyFunction_FastCallKeywords
+      42: _PyEval_EvalCodeWithName
+      41: _PyEval_EvalFrameDefault
+      40: _PyMethodDef_RawFastCallKeywords
+      39: 0x0000000000546369
+      38: _PyEval_EvalCodeWithName
+      37: _PyEval_EvalFrameDefault
+      36: _PyFunction_FastCallKeywords
+      35: _PyEval_EvalCodeWithName
+      34: _PyEval_EvalFrameDefault
+      33: _PyFunction_FastCallDict
+      32: _PyEval_EvalCodeWithName
+      31: _PyEval_EvalFrameDefault
+      30: _PyObject_FastCallDict
+      29: 0x00000000004c06e1
+      28: _PyFunction_FastCallDict
+      27: _PyEval_EvalFrameDefault
+      26: _PyMethodDescr_FastCallKeywords
+      25: 0x00000000005dcb58
+      24: 0x00000000005dc83f
+      23: 0x00000000004ba127
+      22: _PyEval_EvalFrameDefault
+      21: _PyFunction_FastCallKeywords
+      20: _PyEval_EvalFrameDefault
+      19: _PyFunction_FastCallKeywords
+      18: _PyEval_EvalFrameDefault
+      17: _PyFunction_FastCallKeywords
+      16: _PyEval_EvalCodeWithName
+      15: _PyEval_EvalFrameDefault
+      14: 0x0000000000537c30
+      13: _PyObject_FastCallKeywords
+      12: 0x00007f713981bfa2
+      11: _ctypes_callproc
+      10: ffi_call
+      9: ffi_call_unix64
+      8: TVMModGetFunction
+            at ../src/runtime/c_runtime_api.cc:408
+      7: tvm::runtime::ModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool)
+            at ../src/runtime/module.cc:66
+      6: tvm::runtime::RPCModuleNode::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::ObjectPtr<tvm::runtime::Object> const&)
+            at ../src/runtime/rpc/rpc_module.cc:185
+      5: tvm::runtime::RPCClientSession::GetFunction(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
+            at ../src/runtime/rpc/rpc_endpoint.cc:1007
+      4: tvm::runtime::TVMRetValue tvm::runtime::RPCEndpoint::SysCallRemote<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(tvm::runtime::RPCCode, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)
+            at ../src/runtime/rpc/rpc_endpoint.h:223
+      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(int&&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) const
             at ../include/tvm/runtime/packed_func.h:1617
       2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
             at ../include/tvm/runtime/packed_func.h:1217
       1: Call
             at ../include/tvm/runtime/packed_func.h:1213
       0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+            at ../src/runtime/rpc/rpc_endpoint.cc:684
+      File "../src/runtime/rpc/rpc_endpoint.cc", line 684
+    TVMError: 
+    ---------------------------------------------------------------
+    An error occurred during the execution of TVM.
+    For more information, please see: https://tvm.apache.org/docs/errors.html
+    ---------------------------------------------------------------
+      Check failed: (code == RPCCode::kReturn) is false: code=1
 
     Traceback (most recent call last):
-      24: TVMFuncCall
-            at ../src/runtime/c_runtime_api.cc:477
-      23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      22: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      21: operator()
-            at ../include/tvm/runtime/packed_func.h:1730
-      20: unpack_call<tvm::IRModule, 5, tvm::<lambda(tvm::te::Schedule, const tvm::runtime::Array<tvm::runtime::ObjectRef>&, const tvm::runtime::String&, const tvm::runtime::Map<tvm::te::Tensor, tvm::tir::Buffer>&, bool)> >
-            at ../include/tvm/runtime/packed_func.h:1670
-      19: run<>
-            at ../include/tvm/runtime/packed_func.h:1630
-      18: run<tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      17: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      16: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      15: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1630
-      14: run<tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_>
-            at ../include/tvm/runtime/packed_func.h:1645
-      13: operator()
-            at ../src/driver/driver_api.cc:395
-      12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array<tvm::runtime::ObjectRef, void> const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::unordered_map<tvm::te::Tensor, tvm::tir::Buffer, std::hash<tvm::te::Tensor>, std::equal_to<tvm::te::Tensor>, std::allocator<std::pair<tvm::te::Tensor const, tvm::tir::Buffer> > > const&, tvm::GlobalVarSupply, bool)
-            at ../src/driver/driver_api.cc:381
-      11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array<tvm::transform::Pass, void>)
-            at ../src/driver/driver_api.cc:276
-      10: tvm::transform::Pass::operator()(tvm::IRModule) const
-            at ../src/ir/transform.cc:258
-      9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:451
-      7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/ir/transform.cc:274
-      6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
-            at ../src/tir/ir/transform.cc:100
-      5: tvm::runtime::TypedPackedFunc<tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)>::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-            at ../include/tvm/runtime/packed_func.h:1749
-      4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher<tvm::tir::PrimFunc>::run<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::runtime::PackedFunc const&, tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&)
-            at ../include/tvm/runtime/packed_func.h:1693
-      3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()<tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext>(tvm::tir::PrimFunc&&, tvm::IRModule&&, tvm::transform::PassContext&&) const
-            at ../include/tvm/runtime/packed_func.h:1617
-      2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-            at ../include/tvm/runtime/packed_func.h:1217
-      1: Call
-            at ../include/tvm/runtime/packed_func.h:1213
-      0: operator()
-            at ../src/runtime/c_runtime_api.cc:534
-      File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
-      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
-        raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 2, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 256, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2547744
-    No: 7   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+      52: 0xffffffffffffffff
+      51: _start
+      50: __libc_start_main
+      49: _Py_UnixMain
+      48: 0x0000000000650da0
+      47: 0x0000000000650afa
+      46: _PyFunction_FastCallDict
+      45: _PyEval_EvalCodeWithName
+      44: _PyEval_EvalFrameDefault
+      43: _PyFunction_FastCallKeywords
+      42: _PyEval_EvalCodeWithName
+      41: _PyEval_EvalFrameDefault
+      40: _PyMethodDef_RawFastCallKeywords
+      39: 0x0000000000546369
+      38: _PyEval_EvalCodeWithName
+      37: _PyEval_EvalFrameDefault
+      36: _PyFunction_FastCallKeywords
+      35: _PyEval_EvalCodeWithName
+      34: _PyEval_EvalFrameDefault
+      33: _PyFunction_FastCallDict
+      32: _PyEval_EvalCodeWithName
+      31: _PyEval_EvalFrameDefault
+      30: _PyObject_FastCallDict
+      29: 0x00000000004c06e1
+      28: _PyFunction_FastCallDict
+      27: _PyEval_EvalFrameDefault
+      26: _PyMethodDescr_FastCallKeywords
+      25: 0x00000000005dcb58
+      24: 0x00000000005dc83f
+      23: 0x00000000004ba127
+      22: _PyEval_EvalFrameDefault
+      21: _PyFunction_FastCallKeywords
+      20: _PyEval_EvalFrameDefault
+      19: _PyFunction_FastCall      [('tile_f', [-1, 16, 1, 2]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 2]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2359559
+    No: 4   GFLOPS: 115.75/115.75   result: MeasureResult(costs=(0.00200001922,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2005980014801025, timestamp=1674053321.1974628)      [('tile_f', [-1, 2, 2, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7233831
+    No: 5   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -901,8 +790,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 256, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,8904508
-    No: 8   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 2, 8]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4367806
+    No: 6   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1024,8 +913,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 64, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 128, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6608963
-    No: 9   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 16, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 256, 2]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9552559
+    No: 7   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1147,8 +1036,9 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 4, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 1, 64]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9258557
-    No: 10  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 128]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 512]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,190511
+    No: 8   GFLOPS: 1.48/115.75     result: MeasureResult(costs=(0.15625212075,), error_no=MeasureErrorNo.NO_ERROR, all_cost=6.140629768371582, timestamp=1674053328.5444367)       [('tile_f', [-1, 64, 1, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 1]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7938182
+    No: 9   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1270,8 +1160,26 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 4, 4]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 64, 8]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9603335
-    No: 11  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 1, 512]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 2, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3824919
+    No: 10  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
+      File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 142, in build
+        res = future.result()
+      File "/usr/lib/python3.7/concurrent/futures/_base.py", line 435, in result
+        return self.__get_result()
+      File "/usr/lib/python3.7/concurrent/futures/_base.py", line 384, in __get_result
+        raise self._exception
+      File "/usr/lib/python3.7/concurrent/futures/thread.py", line 57, in run
+        result = self.fn(*self.args, **self.kwargs)
+      File "/workspace/python/tvm/contrib/popen_pool.py", line 432, in <lambda>
+        worker = lambda *args: self._worker_run(*args)
+      File "/workspace/python/tvm/contrib/popen_pool.py", line 401, in _worker_run
+        return proc.recv()
+      File "/workspace/python/tvm/contrib/popen_pool.py", line 309, in recv
+        raise TimeoutError()
+    TimeoutError
+
+            [('tile_f', [-1, 8, 4, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 1, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9823242
+    No: 11  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1393,8 +1301,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 16, 4]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2587768
-    No: 12  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 16, 2, 8]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 2]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2754107
+    No: 12  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1516,8 +1424,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 32]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 0)],None,736315
-    No: 13  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 1, 32]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 2, 64]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5779367
+    No: 13  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1639,8 +1547,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 2, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,10044555
-    No: 14  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 4, 8]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 7]), ('tile_rc', [-1, 16, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7827752
+    No: 14  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1762,8 +1670,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 1, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 32]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6348485
-    No: 15  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 1, 2, 8]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 16, 32]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,5578243
+    No: 15  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -1885,8 +1793,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 4, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 4, 4]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7624397
-    No: 16  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 2, 4]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 16]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3809631
+    No: 16  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2008,8 +1916,9 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 2, 64]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 4, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,4848126
-    No: 17  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 8, 1, 32]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 1, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,1913088
+    No: 17  GFLOPS: 40.72/115.75    result: MeasureResult(costs=(0.005685248041666667,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.482030391693115, timestamp=1674053344.7685432)        [('tile_f', [-1, 8, 1, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2588578
+    No: 18  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2131,8 +2040,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 1, 16]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 512, 1]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7195926
-    No: 18  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 2, 4, 16]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 1, 128]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9659496
+    No: 19  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2254,8 +2163,8 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 4, 8, 2]), ('tile_y', [-1, 1, 7, 1]), ('tile_x', [-1, 7, 1, 1]), ('tile_rc', [-1, 16, 8]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 0)],None,2240121
-    No: 19  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 256, 1, 1]), ('tile_y', [-1, 1, 1, 7]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 64, 8]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 0)],None,3795228
+    No: 20  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 592, in __call__
         func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 544, in _build_func_common
@@ -2377,8 +2286,7 @@ for this template
       File "tvm/_ffi/_cython/./packed_func.pxi", line 56, in tvm._ffi._cy3.core.tvm_callback
       File "/workspace/python/tvm/autotvm/measure/measure_methods.py", line 875, in verify_pass
         raise InstantiationError("Skipped because of invalid gpu kernel")
-    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 32, 4, 4]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 8, 64]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 1500), ('unroll_explicit', 1)],None,9268500
-    No: 20  GFLOPS: 234.57/234.57   result: MeasureResult(costs=(0.0009869387213114753,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.453100681304932, timestamp=1674022621.5645502)       [('tile_f', [-1, 1, 4, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7851432
+    tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [('tile_f', [-1, 256, 1, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 16, 4]), ('tile_ry', [-1, 1, 3]), ('tile_rx', [-1, 1, 3]), ('auto_unroll_max_step', 0), ('unroll_explicit', 1)],None,6857188
 
 
 
@@ -2433,9 +2341,9 @@ and measure running time.
     Finish loading 20 records
 
     Best config:
-    [('tile_f', [-1, 1, 4, 2]), ('tile_y', [-1, 1, 1, 1]), ('tile_x', [-1, 1, 7, 1]), ('tile_rc', [-1, 8, 8]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 3, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7851432
+    [('tile_f', [-1, 2, 2, 1]), ('tile_y', [-1, 7, 1, 1]), ('tile_x', [-1, 1, 1, 1]), ('tile_rc', [-1, 2, 4]), ('tile_ry', [-1, 3, 1]), ('tile_rx', [-1, 1, 1]), ('auto_unroll_max_step', 512), ('unroll_explicit', 1)],None,7233831
     Finish loading 20 records
-    Time cost of this operator: 0.001165
+    Time cost of this operator: 0.002476
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
index bf76d56af5..447ad7a895 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_autotune.rst.txt
@@ -363,10 +363,10 @@ Timing the untuned program
     ########## Build without Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  308.2     98.702   (1, 2, 10, 10, 3)  2       1        [308.2]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.09      0.99     (1, 6, 10, 10)     1       1        [3.09]            
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.965     0.309    (1, 1, 10, 10, 3)  1       1        [0.965]           
-    Total_time                                    -                                             312.254   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  310.8     98.702   (1, 2, 10, 10, 3)  2       1        [310.8]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.088     0.981    (1, 6, 10, 10)     1       1        [3.088]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.998     0.317    (1, 1, 10, 10, 3)  1       1        [0.998]           
+    Total_time                                    -                                             314.886   -        -                  -       -        -                 
 
 
 
@@ -431,10 +431,10 @@ Timing the tuned program
     ########## Build with Autotuning ##########
     Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)  
     ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------  
-    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  100.1     97.36    (1, 6, 10, 10, 1)  2       1        [100.1]           
-    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.75      1.703    (1, 6, 10, 10)     1       1        [1.75]            
-    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.964     0.938    (1, 1, 10, 10, 3)  1       1        [0.964]           
-    Total_time                                    -                                             102.814   -        -                  -       -        -                 
+    tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  100.2     97.306   (1, 6, 10, 10, 1)  2       1        [100.2]           
+    tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.779     1.727    (1, 6, 10, 10)     1       1        [1.779]           
+    tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.996     0.967    (1, 1, 10, 10, 3)  1       1        [0.996]           
+    Total_time                                    -                                             102.975   -        -                  -       -        -                 
 
 
 
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
index 9523bbadfd..f2bdaa4dc8 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_pytorch.rst.txt
@@ -117,7 +117,7 @@ download a cat image and preprocess it to use as the model input.
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/ao/quantization/utils.py:281: UserWarning: must run observer before calling calculate_qparams. Returning default values.
       "must run observer before calling calculate_qparams. " +
     Downloading: "https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth" to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
-
      0%|          | 0.00/3.42M [00:00<?, ?B/s]
    100%|##########| 3.42M/3.42M [00:00<00:00, 71.2MB/s]
+
      0%|          | 0.00/3.42M [00:00<?, ?B/s]
    100%|##########| 3.42M/3.42M [00:00<00:00, 108MB/s]
     /workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
       return LooseVersion(torch_ver) > ver
     /venv/apache-tvm-py3.7/lib/python3.7/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
@@ -322,7 +322,7 @@ Look up prediction top 1 index in 1000 class synset.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  7.816 seconds)
+   **Total running time of the script:** ( 1 minutes  10.386 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_pytorch.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
index fd5dc458fb..eea9c51115 100644
--- a/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/micro_train.rst.txt
@@ -218,7 +218,7 @@ take about **2 minutes** to download the Stanford Cars, while COCO 2017 validati
  .. code-block:: none
 
 
-    '/tmp/tmpdfofimzq/images/random'
+    '/tmp/tmp1dvacfup/images/random'
 
 
 
@@ -318,8 +318,8 @@ objects to other stuff? We can display some examples from our datasets using ``m
 
  .. code-block:: none
 
-    /tmp/tmpdfofimzq/images/target contains 8144 images
-    /tmp/tmpdfofimzq/images/random contains 5000 images
+    /tmp/tmp1dvacfup/images/target contains 8144 images
+    /tmp/tmp1dvacfup/images/random contains 5000 images
 
 
 
@@ -494,13 +494,13 @@ the time on our validation set).
  .. code-block:: none
 
     Epoch 1/3
-    328/328 - 47s - loss: 0.2342 - accuracy: 0.9197 - val_loss: 0.1264 - val_accuracy: 0.9532 - 47s/epoch - 143ms/step
+    328/328 - 47s - loss: 0.2087 - accuracy: 0.9283 - val_loss: 0.2082 - val_accuracy: 0.9335 - 47s/epoch - 144ms/step
     Epoch 2/3
-    328/328 - 43s - loss: 0.1031 - accuracy: 0.9623 - val_loss: 0.1160 - val_accuracy: 0.9532 - 43s/epoch - 131ms/step
+    328/328 - 44s - loss: 0.0912 - accuracy: 0.9667 - val_loss: 0.1188 - val_accuracy: 0.9585 - 44s/epoch - 133ms/step
     Epoch 3/3
-    328/328 - 43s - loss: 0.0653 - accuracy: 0.9745 - val_loss: 0.1150 - val_accuracy: 0.9600 - 43s/epoch - 131ms/step
+    328/328 - 43s - loss: 0.0671 - accuracy: 0.9753 - val_loss: 0.1070 - val_accuracy: 0.9645 - 43s/epoch - 132ms/step
 
-    <keras.callbacks.History object at 0x7f2087634f90>
+    <keras.callbacks.History object at 0x7f3059d59450>
 
 
 
@@ -857,7 +857,7 @@ Arduino tutorial for how to do that `on GitHub <https://github.com/guberti/tvm-a
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 4 minutes  45.460 seconds)
+   **Total running time of the script:** ( 4 minutes  42.715 seconds)
 
 
 .. _sphx_glr_download_how_to_work_with_microtvm_micro_train.py:
diff --git a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
index 83e3ae9702..65f5a04135 100644
--- a/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_microtvm/sg_execution_times.rst.txt
@@ -5,18 +5,18 @@
 
 Computation times
 =================
-**06:55.995** total execution time for **how_to_work_with_microtvm** files:
+**06:58.575** total execution time for **how_to_work_with_microtvm** files:
 
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:45.460 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_train.py` (``micro_train.py``)               | 04:42.715 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_pytorch.py` (``micro_pytorch.py``)           | 01:07.816 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_pytorch.py` (``micro_pytorch.py``)           | 01:10.386 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:50.563 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_autotune.py` (``micro_autotune.py``)         | 00:52.417 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:08.439 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_aot.py` (``micro_aot.py``)                   | 00:09.135 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.716 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_microtvm_micro_tflite.py` (``micro_tflite.py``)             | 00:03.923 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_microtvm_micro_ethosu.py` (``micro_ethosu.py``)             | 00:00.000 | 0.0 MB |
 +---------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
index 91d4bd53ca..660b5b707c 100644
--- a/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_relay/sg_execution_times.rst.txt
@@ -5,14 +5,14 @@
 
 Computation times
 =================
-**00:43.054** total execution time for **how_to_work_with_relay** files:
+**00:44.712** total execution time for **how_to_work_with_relay** files:
 
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:31.577 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_pipeline_executor.py` (``using_pipeline_executor.py``) | 00:32.703 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:09.963 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_using_external_lib.py` (``using_external_lib.py``)           | 00:10.408 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.508 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_relay_build_gcn.py` (``build_gcn.py``)                             | 00:01.595 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_relay_using_relay_viz.py` (``using_relay_viz.py``)                 | 00:00.006 | 0.0 MB |
 +----------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
index 320b394aad..8c62bfe8f3 100644
--- a/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/intrin_math.rst.txt
@@ -264,7 +264,7 @@ The following example customizes CUDA lowering rule for :code:`exp`.
  .. code-block:: none
 
 
-    <function my_cuda_math_rule at 0x7f2087e5d9e0>
+    <function my_cuda_math_rule at 0x7f305a4eb560>
 
 
 
diff --git a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
index c48c5b2b2c..cbe08c557e 100644
--- a/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/sg_execution_times.rst.txt
@@ -5,22 +5,22 @@
 
 Computation times
 =================
-**00:04.522** total execution time for **how_to_work_with_schedules** files:
+**00:07.862** total execution time for **how_to_work_with_schedules** files:
 
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:02.165 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_intrin_math.py` (``intrin_math.py``)                 | 00:05.261 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.088 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tensorize.py` (``tensorize.py``)                     | 00:01.222 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.534 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_reduction.py` (``reduction.py``)                     | 00:00.583 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.515 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_scan.py` (``scan.py``)                               | 00:00.571 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.116 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_extern_op.py` (``extern_op.py``)                     | 00:00.118 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.049 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_schedule_primitives.py` (``schedule_primitives.py``) | 00:00.051 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_how_to_work_with_schedules_tedd.py` (``tedd.py``)                               | 00:00.031 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.023 | 0.0 MB |
+| :ref:`sphx_glr_how_to_work_with_schedules_tuple_inputs.py` (``tuple_inputs.py``)               | 00:00.024 | 0.0 MB |
 +------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
index 05280171c9..6eeee7d3d1 100644
--- a/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
+++ b/docs/_sources/how_to/work_with_schedules/tensorize.rst.txt
@@ -340,7 +340,7 @@ The importing needs to happen before the tensorized GEMV being executed.
             B_1 = T.match_buffer(B, (512, 64))
             C_1 = T.match_buffer(C, (1024, 512))
             i = T.var("int32")
-            T.attr(T.iter_var(i, None, "DataPar", ""), "pragma_import_llvm", "; ModuleID = '/tmp/tmpa__vx31f/input0.cc'\nsource_filename = \"/tmp/tmpa__vx31f/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca float*, [...]
+            T.attr(T.iter_var(i, None, "DataPar", ""), "pragma_import_llvm", "; ModuleID = '/tmp/tmpk1_7pted/input0.cc'\nsource_filename = \"/tmp/tmpk1_7pted/input0.cc\"\ntarget datalayout = \"e-m:e-i64:64-f80:128-n8:16:32:64-S128\"\ntarget triple = \"x86_64-pc-linux-gnu\"\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca float*, align 8\n  %8 = alloca float*, align 8\n  %9 = alloca float*, [...]
             for i, j_outer in T.grid(1024, 32):
                 T.call_extern("int32", "gemv_update", T.tvm_access_ptr(T.type_annotation("float32"), C_1.data, i * 512 + j_outer * 16, 16, 2), T.tvm_access_ptr(T.type_annotation("float32"), A_1.data, i * 64, 64, 1), T.tvm_access_ptr(T.type_annotation("float32"), B_1.data, j_outer * 1024, 1024, 1), 16, 64, 64)
 
diff --git a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
index 7bc70c78a2..aae6e6abda 100644
--- a/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/autotvm/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:29.221** total execution time for **topic_vta_tutorials_autotvm** files:
+**00:30.161** total execution time for **topic_vta_tutorials_autotvm** files:
 
 +---------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:29.215 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_relay_vta.py` (``tune_relay_vta.py``) | 00:30.154 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_topic_vta_tutorials_autotvm_tune_alu_vta.py` (``tune_alu_vta.py``)     | 00:00.007 | 0.0 MB |
 +---------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
index 271646101c..f2b9464216 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_classification.rst.txt
@@ -293,7 +293,7 @@ The compilation steps are:
       DeprecationWarning,
     /workspace/vta/tutorials/frontend/deploy_classification.py:213: DeprecationWarning: legacy graph executor behavior of producing json / lib / params will be removed in the next release. Please see documents of tvm.contrib.graph_executor.GraphModule for the  new recommended usage.
       relay_prog, target=tvm.target.Target(target, host=env.target_host), params=params
-    resnet18_v1 inference graph built in 31.10s!
+    resnet18_v1 inference graph built in 32.45s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
index 18e0adc26a..e8eda5f256 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/deploy_detection.rst.txt
@@ -337,7 +337,7 @@ The compilation steps are:
 
     /workspace/python/tvm/relay/build_module.py:348: DeprecationWarning: Please use input parameter mod (tvm.IRModule) instead of deprecated parameter mod (tvm.relay.function.Function)
       DeprecationWarning,
-    yolov3-tiny inference graph built in 21.25s!
+    yolov3-tiny inference graph built in 22.21s!
 
 
 
diff --git a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
index 7b430e89da..f30926309e 100644
--- a/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/frontend/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**01:35.883** total execution time for **topic_vta_tutorials_frontend** files:
+**01:38.836** total execution time for **topic_vta_tutorials_frontend** files:
 
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:48.072 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_detection.py` (``deploy_detection.py``)           | 00:49.492 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:47.811 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_frontend_deploy_classification.py` (``deploy_classification.py``) | 00:49.344 | 0.0 MB |
 +------------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
index 529f62772c..f9588a3460 100644
--- a/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/optimize/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:03.075** total execution time for **topic_vta_tutorials_optimize** files:
+**00:03.145** total execution time for **topic_vta_tutorials_optimize** files:
 
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.657 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_convolution_opt.py` (``convolution_opt.py``)         | 00:02.681 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.418 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_optimize_matrix_multiply_opt.py` (``matrix_multiply_opt.py``) | 00:00.464 | 0.0 MB |
 +--------------------------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
index f9b6b55da1..8f9e65a790 100644
--- a/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
+++ b/docs/_sources/topic/vta/tutorials/sg_execution_times.rst.txt
@@ -5,10 +5,10 @@
 
 Computation times
 =================
-**00:00.777** total execution time for **topic_vta_tutorials** files:
+**00:00.841** total execution time for **topic_vta_tutorials** files:
 
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.427 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_matrix_multiply.py` (``matrix_multiply.py``) | 00:00.443 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.349 | 0.0 MB |
+| :ref:`sphx_glr_topic_vta_tutorials_vta_get_started.py` (``vta_get_started.py``) | 00:00.398 | 0.0 MB |
 +---------------------------------------------------------------------------------+-----------+--------+
diff --git a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
index cf50be6d5d..1ba9fd2c01 100644
--- a/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/auto_scheduler_matmul_x86.rst.txt
@@ -207,13 +207,6 @@ trials, we can load the best schedule from the log file and apply it.
 
 
 
-.. rst-class:: sphx-glr-script-out
-
- .. code-block:: none
-
-    .T
-
-
 
 
 
@@ -326,7 +319,7 @@ We build the binary and check its correctness and performance.
 
  .. code-block:: none
 
-    Execution time of this operator: 94.901 ms
+    Execution time of this operator: 93.712 ms
 
 
 
@@ -444,7 +437,7 @@ operations.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 1 minutes  29.212 seconds)
+   **Total running time of the script:** ( 1 minutes  11.740 seconds)
 
 
 .. _sphx_glr_download_tutorial_auto_scheduler_matmul_x86.py:
diff --git a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
index 6edb23bd3c..223332f6ef 100644
--- a/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_matmul_x86.rst.txt
@@ -454,16 +454,16 @@ reduce variance, we take 5 measurements and average them.
     waiting for device...
     device available
     Get devices for measurement successfully!
-    No: 1   GFLOPS: 2.42/2.42       result: MeasureResult(costs=(0.11078919699999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.0058329105377197, timestamp=1674021106.6407683)        [('tile_y', [-1, 2]), ('tile_x', [-1, 4])],None,21
-    No: 2   GFLOPS: 12.39/12.39     result: MeasureResult(costs=(0.0216594874,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5815625190734863, timestamp=1674021107.9928033)       [('tile_y', [-1, 2]), ('tile_x', [-1, 512])],None,91
-    No: 3   GFLOPS: 1.79/12.39      result: MeasureResult(costs=(0.1502231728,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.643393039703369, timestamp=1674021111.4178398)        [('tile_y', [-1, 512]), ('tile_x', [-1, 4])],None,29
-    No: 4   GFLOPS: 2.03/12.39      result: MeasureResult(costs=(0.1321356942,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.333951711654663, timestamp=1674021113.7726407)        [('tile_y', [-1, 256]), ('tile_x', [-1, 4])],None,28
-    No: 5   GFLOPS: 10.96/12.39     result: MeasureResult(costs=(0.024493366199999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.619408369064331, timestamp=1674021114.6219072)        [('tile_y', [-1, 1]), ('tile_x', [-1, 512])],None,90
-    No: 6   GFLOPS: 3.27/12.39      result: MeasureResult(costs=(0.08203405239999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5492236614227295, timestamp=1674021116.1835446)        [('tile_y', [-1, 32]), ('tile_x', [-1, 8])],None,35
-    No: 7   GFLOPS: 12.80/12.80     result: MeasureResult(costs=(0.020977112399999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6758532524108887, timestamp=1674021117.5373123)       [('tile_y', [-1, 64]), ('tile_x', [-1, 128])],None,76
-    No: 8   GFLOPS: 12.95/12.95     result: MeasureResult(costs=(0.020725559400000003,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5682404041290283, timestamp=1674021118.123468)        [('tile_y', [-1, 128]), ('tile_x', [-1, 512])],None,97
-    No: 9   GFLOPS: 13.87/13.87     result: MeasureResult(costs=(0.019353842399999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.534494161605835, timestamp=1674021118.773921) [('tile_y', [-1, 128]), ('tile_x', [-1, 64])],None,67
-    No: 10  GFLOPS: 0.90/13.87      result: MeasureResult(costs=(0.2990334894,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.992733716964722, timestamp=1674021123.8084934)        [('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
+    No: 1   GFLOPS: 9.62/9.62       result: MeasureResult(costs=(0.027898637000000004,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.7480354309082031, timestamp=1674051804.3226438)       [('tile_y', [-1, 512]), ('tile_x', [-1, 64])],None,69
+    No: 2   GFLOPS: 13.01/13.01     result: MeasureResult(costs=(0.020628920199999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.6848492622375488, timestamp=1674051804.9134333)       [('tile_y', [-1, 64]), ('tile_x', [-1, 128])],None,76
+    No: 3   GFLOPS: 12.70/13.01     result: MeasureResult(costs=(0.0211327958,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.5699365139007568, timestamp=1674051806.2736955)       [('tile_y', [-1, 32]), ('tile_x', [-1, 512])],None,95
+    No: 4   GFLOPS: 0.87/13.01      result: MeasureResult(costs=(0.3092539082,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.162877321243286, timestamp=1674051812.2449753)        [('tile_y', [-1, 512]), ('tile_x', [-1, 2])],None,19
+    No: 5   GFLOPS: 10.24/13.01     result: MeasureResult(costs=(0.026225929999999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.7040507793426514, timestamp=1674051813.0622177)       [('tile_y', [-1, 8]), ('tile_x', [-1, 64])],None,63
+    No: 6   GFLOPS: 2.57/13.01      result: MeasureResult(costs=(0.104494733,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.9170234203338623, timestamp=1674051814.9868991)        [('tile_y', [-1, 8]), ('tile_x', [-1, 4])],None,23
+    No: 7   GFLOPS: 2.14/13.01      result: MeasureResult(costs=(0.12555924840000002,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2493879795074463, timestamp=1674051818.0408082)        [('tile_y', [-1, 1]), ('tile_x', [-1, 8])],None,30
+    No: 8   GFLOPS: 9.90/13.01      result: MeasureResult(costs=(0.027107054799999995,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.663581371307373, timestamp=1674051818.7291815)        [('tile_y', [-1, 1]), ('tile_x', [-1, 256])],None,80
+    No: 9   GFLOPS: 2.06/13.01      result: MeasureResult(costs=(0.13019473139999999,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2868573665618896, timestamp=1674051821.1410513)        [('tile_y', [-1, 4]), ('tile_x', [-1, 2])],None,12
+    No: 10  GFLOPS: 3.26/13.01      result: MeasureResult(costs=(0.0823628416,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.5267679691314697, timestamp=1674051822.707896)        [('tile_y', [-1, 32]), ('tile_x', [-1, 8])],None,35
 
 
 
diff --git a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
index 6a1cad13f4..082f108426 100644
--- a/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
+++ b/docs/_sources/tutorial/autotvm_relay_x86.rst.txt
@@ -311,7 +311,7 @@ standard deviation.
 
  .. code-block:: none
 
-    {'mean': 514.1060797499813, 'median': 513.3606129998952, 'std': 2.953015519281599}
+    {'mean': 514.1226119299996, 'median': 514.5901267999989, 'std': 2.544468258735559}
 
 
 
@@ -545,30 +545,30 @@ the tuning data to.
 
  .. code-block:: none
 
-
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:    4.27/  15.23 GFLOPS | Progress: (4/20) | 9.22 s
    [Task  1/25]  Current/Best:   22.56/  22.56 GFLOPS | Progress: (8/20) | 12.15 s
    [Task  1/25]  Current/Best:   12.68/  22.56 GFLOPS | Progress: (12/20) | 18.40 s
    [Task  1/25]  Current/Best:    6.08/  22.56 GFLOPS | Progress: (16/20) | 20.88 s
    [Task  1/25]  Current/Best:    8.98/  22.56 GFLOPS | Progress: (20/20) | 24.44 s Done.
-
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   15.23/  15.23 GFLOPS | Progress: (4/20) | 3.53 s
    [Task  2/25]  Current/Best:   16.97/  19.37 GFLOPS | Progress: (8/20) | 4.94 s
    [Task  2/25]  Current/Best:    9.78/  19.37 GFLOPS | Progress: (12/20) | 6.80 s
    [Task  2/25]  Current/Best:   18.50/  19.58 GFLOPS | Progress: (16/20) | 8.74 s
    [Task  2/25]  Current/Best:    7.74/  19.85 GFLOPS | Progress: (20/20) | 10.81 s Done.
-
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:   13.17/  19.26 GFLOPS | Progress: (4/20) | 3.89 s
    [Task  3/25]  Current/Best:   19.11/  20.00 GFLOPS | Progress: (8/20) | 6.03 s
    [Task  3/25]  Current/Best:    9.24/  20.00 GFLOPS | Progress: (12/20) | 9.95 s
    [Task  3/25]  Current/Best:   12.87/  20.00 GFLOPS | Progress: (16/20) | 12.69 s
    [Task  3/25]  Current/Best:   16.98/  21.75 GFLOPS | Progress: (20/20) | 14.77 s Done.
-
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:    5.19/  19.85 GFLOPS | Progress: (4/20) | 4.17 s
    [Task  4/25]  Current/Best:   15.63/  19.85 GFLOPS | Progress: (8/20) | 8.71 s
    [Task  4/25]  Current/Best:   12.37/  19.85 GFLOPS | Progress: (12/20) | 11.14 s
    [Task  4/25]  Current/Best:   16.38/  19.85 GFLOPS | Progress: (16/20) | 12.94 s
    [Task  4/25]  Current/Best:   19.80/  19.85 GFLOPS | Progress: (20/20) | 14.43 s Done.
-
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  5/25]  Current/Best:   22.50/  22.50 GFLOPS | Progress: (4/20) | 3.95 s
    [Task  5/25]  Current/Best:   16.33/  22.50 GFLOPS | Progress: (8/20) | 6.33 s
    [Task  5/25]  Current/Best:   12.03/  22.50 GFLOPS | Progress: (12/20) | 9.83 s
    [Task  5/25]  Current/Best:   12.40/  22.50 GFLOPS | Progress: (16/20) | 11.82 s
    [Task  5/25]  Current/Best:   17.06/  22.50 GFLOPS | Progress: (20/20) | 13.74 s Done.
-
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   13.10/  15.26 GFLOPS | Progress: (4/20) | 4.62 s
    [Task  6/25]  Current/Best:    3.72/  15.26 GFLOPS | Progress: (8/20) | 8.09 s
    [Task  6/25]  Current/Best:    2.90/  15.45 GFLOPS | Progress: (12/20) | 16.20 s
    [Task  6/25]  Current/Best:    2.62/  15.45 GFLOPS | Progress: (16/20) | 19.95 s
    [Task  6/25]  Current/Best:    5.76/  15.45 GFLOPS | Progress: (20/20) | 24.56 s Done.
-
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   18.07/  21.63 GFLOPS | Progress: (4/20) | 3.93 s
    [Task  7/25]  Current/Best:   19.61/  21.63 GFLOPS | Progress: (8/20) | 6.36 s
    [Task  7/25]  Current/Best:   15.27/  21.63 GFLOPS | Progress: (12/20) | 10.01 s
    [Task  7/25]  Current/Best:   19.25/  21.63 GFLOPS | Progress: (16/20) | 12.55 s
    [Task  7/25]  Current/Best:   15.44/  21.63 GFLOPS | Progress: (20/20) | 14.86 s Done.
-
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   13.99/  13.99 GFLOPS | Progress: (4/20) | 7.20 s
    [Task  8/25]  Current/Best:   11.36/  13.99 GFLOPS | Progress: (8/20) | 19.00 s
    [Task  8/25]  Current/Best:    9.32/  13.99 GFLOPS | Progress: (12/20) | 28.01 s
    [Task  8/25]  Current/Best:   12.92/  14.32 GFLOPS | Progress: (16/20) | 31.44 s
    [Task  8/25]  Current/Best:   14.42/  14.42 GFLOPS | Progress: (20/20) | 34.09 s
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:   10.72/  21.23 GFLOPS | Progress: (4/20) | 4.49 s
    [Task  9/25]  Current/Best:   11.52/  21.23 GFLOPS | Progress: (8/20) | 7.21 s
    [Task  9/25]  Current/Best:   11.32/  21.23 GFLOPS | Progress: (12/20) | 10.40 s
    [Task  9/25]  Current/Best:   10.15/  21.23 GFLOPS | Progress: (16/20) | 13.38 s
    [Task  9/25]  Current/Best:   21.39/  21.39 GFLOPS | Progress: (20/2
 0) | 19.26 s Done.
-
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 10/25]  Current/Best:    9.11/  20.57 GFLOPS | Progress: (4/20) | 3.59 s
    [Task 10/25]  Current/Best:   11.82/  20.57 GFLOPS | Progress: (8/20) | 5.92 s
    [Task 10/25]  Current/Best:    9.54/  20.57 GFLOPS | Progress: (12/20) | 7.66 s
    [Task 10/25]  Current/Best:   18.41/  20.57 GFLOPS | Progress: (16/20) | 9.94 s
    [Task 10/25]  Current/Best:   15.59/  20.57 GFLOPS | Progress: (20/20) | 11.71 s Done.
-
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:   12.86/  16.90 GFLOPS | Progress: (4/20) | 3.90 s
    [Task 11/25]  Current/Best:   10.00/  22.05 GFLOPS | Progress: (8/20) | 6.21 s
    [Task 11/25]  Current/Best:   20.18/  22.05 GFLOPS | Progress: (12/20) | 8.56 s
    [Task 11/25]  Current/Best:   23.20/  23.20 GFLOPS | Progress: (16/20) | 11.21 s
    [Task 11/25]  Current/Best:    9.29/  23.20 GFLOPS | Progress: (20/20) | 13.42 s Done.
-
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:    3.48/  12.17 GFLOPS | Progress: (4/20) | 6.21 s
    [Task 12/25]  Current/Best:    8.11/  16.26 GFLOPS | Progress: (8/20) | 8.65 s
    [Task 12/25]  Current/Best:   19.55/  19.55 GFLOPS | Progress: (12/20) | 11.03 s
    [Task 12/25]  Current/Best:   12.76/  19.55 GFLOPS | Progress: (16/20) | 15.24 s
    [Task 12/25]  Current/Best:   17.53/  19.55 GFLOPS | Progress: (20/20) | 18.83 s Done.
-
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:    8.04/  14.74 GFLOPS | Progress: (4/20) | 4.85 s
    [Task 13/25]  Current/Best:   14.34/  18.23 GFLOPS | Progress: (8/20) | 7.05 s
    [Task 13/25]  Current/Best:    6.09/  21.58 GFLOPS | Progress: (12/20) | 10.06 s
    [Task 13/25]  Current/Best:    8.29/  21.58 GFLOPS | Progress: (16/20) | 13.98 s
    [Task 13/25]  Current/Best:   12.58/  21.58 GFLOPS | Progress: (20/20) | 17.02 s Done.
-
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:    2.45/  10.90 GFLOPS | Progress: (4/20) | 5.24 s
    [Task 14/25]  Current/Best:    9.30/  17.68 GFLOPS | Progress: (8/20) | 9.91 s
    [Task 14/25]  Current/Best:   12.88/  17.68 GFLOPS | Progress: (12/20) | 13.41 s
    [Task 14/25]  Current/Best:   21.08/  21.08 GFLOPS | Progress: (16/20) | 18.08 s
    [Task 14/25]  Current/Best:   16.73/  21.08 GFLOPS | Progress: (20/20) | 19.71 s
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
+
    [Task  1/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  1/25]  Current/Best:   12.75/  13.41 GFLOPS | Progress: (4/20) | 8.82 s
    [Task  1/25]  Current/Best:   12.68/  18.42 GFLOPS | Progress: (8/20) | 14.95 s
    [Task  1/25]  Current/Best:   21.99/  21.99 GFLOPS | Progress: (12/20) | 18.95 s
    [Task  1/25]  Current/Best:   17.59/  21.99 GFLOPS | Progress: (16/20) | 21.31 s
    [Task  1/25]  Current/Best:   11.20/  21.99 GFLOPS | Progress: (20/20) | 24.08 s Done.
+
    [Task  2/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  2/25]  Current/Best:   14.62/  16.14 GFLOPS | Progress: (4/20) | 3.84 s
    [Task  2/25]  Current/Best:    3.91/  21.26 GFLOPS | Progress: (8/20) | 5.42 s
    [Task  2/25]  Current/Best:   11.60/  21.26 GFLOPS | Progress: (12/20) | 6.99 s
    [Task  2/25]  Current/Best:   19.82/  21.26 GFLOPS | Progress: (16/20) | 8.67 s
    [Task  2/25]  Current/Best:   11.01/  21.26 GFLOPS | Progress: (20/20) | 10.82 s Done.
+
    [Task  3/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  3/25]  Current/Best:   14.88/  17.03 GFLOPS | Progress: (4/20) | 4.27 s
    [Task  3/25]  Current/Best:   13.33/  20.19 GFLOPS | Progress: (8/20) | 6.98 s
    [Task  3/25]  Current/Best:   12.62/  20.19 GFLOPS | Progress: (12/20) | 9.76 s
    [Task  3/25]  Current/Best:    8.23/  20.19 GFLOPS | Progress: (16/20) | 11.99 s
    [Task  3/25]  Current/Best:    7.04/  20.19 GFLOPS | Progress: (20/20) | 14.35 s Done.
+
    [Task  4/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  4/25]  Current/Best:   13.78/  13.78 GFLOPS | Progress: (4/20) | 9.30 s
    [Task  4/25]  Current/Best:   17.55/  17.55 GFLOPS | Progress: (8/20) | 11.49 s
    [Task  4/25]  Current/Best:   16.39/  17.55 GFLOPS | Progress: (12/20) | 15.26 s
    [Task  4/25]  Current/Best:   17.36/  17.55 GFLOPS | Progress: (16/20) | 17.50 s
    [Task  4/25]  Current/Best:    4.19/  17.55 GFLOPS | Progress: (20/20) | 28.75 s
    [Task  5/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
+
    [Task  5/25]  Current/Best:   18.54/  18.54 GFLOPS | Progress: (4/20) | 4.16 s
    [Task  5/25]  Current/Best:   18.16/  18.54 GFLOPS | Progress: (8/20) | 6.54 s
    [Task  5/25]  Current/Best:   10.97/  18.54 GFLOPS | Progress: (12/20) | 9.94 s
    [Task  5/25]  Current/Best:    8.22/  18.54 GFLOPS | Progress: (16/20) | 12.43 s
    [Task  5/25]  Current/Best:    4.20/  18.54 GFLOPS | Progress: (20/20) | 14.52 s Done.
+
    [Task  6/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  6/25]  Current/Best:   13.35/  13.35 GFLOPS | Progress: (4/20) | 4.96 s
    [Task  6/25]  Current/Best:   10.25/  17.68 GFLOPS | Progress: (8/20) | 7.41 s
    [Task  6/25]  Current/Best:    3.21/  20.36 GFLOPS | Progress: (12/20) | 10.26 s
    [Task  6/25]  Current/Best:   17.47/  20.36 GFLOPS | Progress: (16/20) | 13.93 s
    [Task  6/25]  Current/Best:   13.10/  20.36 GFLOPS | Progress: (20/20) | 16.08 s Done.
+
    [Task  7/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  7/25]  Current/Best:   12.08/  17.76 GFLOPS | Progress: (4/20) | 5.15 s
    [Task  7/25]  Current/Best:    5.28/  17.76 GFLOPS | Progress: (8/20) | 9.26 s
    [Task  7/25]  Current/Best:   13.97/  19.47 GFLOPS | Progress: (12/20) | 11.41 s
    [Task  7/25]  Current/Best:    9.03/  19.47 GFLOPS | Progress: (16/20) | 14.59 s
    [Task  7/25]  Current/Best:   14.36/  19.47 GFLOPS | Progress: (20/20) | 18.75 s Done.
+
    [Task  8/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  8/25]  Current/Best:   12.39/  12.39 GFLOPS | Progress: (4/20) | 6.27 s
    [Task  8/25]  Current/Best:   10.23/  16.14 GFLOPS | Progress: (8/20) | 8.82 s
    [Task  8/25]  Current/Best:   10.51/  16.14 GFLOPS | Progress: (12/20) | 20.42 s
    [Task  8/25]  Current/Best:    2.87/  16.14 GFLOPS | Progress: (16/20) | 28.64 s
    [Task  8/25]  Current/Best:   12.71/  16.14 GFLOPS | Progress: (20/20) | 31.30 s
    [Task  9/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task  9/25]  Current/Best:    9.88/  22.73 GFLOPS | Progress: (4/20) | 7.10 s
    [Task  9/25]  Current/Best:    9.24/  22.73 GFLOPS | Progress: (8/20) | 11.48 s
    [Task  9/25]  Current/Best:   10.49/  22.73 GFLOPS | Progress: (12/20) | 22.58 s
    [Task  9/25]  Current/Best:   20.80/  22.73 GFLOPS | Progress: (16/20) | 33.62 s
    [Task  9/25]  Current/Best:   12.27/  22.73 GFLOPS | Progress: (20/2
 0) | 37.95 s
    [Task 10/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
      Done.
-
    [Task 15/25]  Current/Best:   16.41/  16.41 GFLOPS | Progress: (4/20) | 4.54 s
    [Task 15/25]  Current/Best:    9.49/  16.41 GFLOPS | Progress: (8/20) | 8.61 s
    [Task 15/25]  Current/Best:    6.19/  20.03 GFLOPS | Progress: (12/20) | 10.43 s
    [Task 15/25]  Current/Best:   10.53/  22.37 GFLOPS | Progress: (16/20) | 15.23 s
    [Task 15/25]  Current/Best:    9.28/  22.37 GFLOPS | Progress: (20/20) | 19.52 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   15.78/  21.30 GFLOPS | Progress: (4/20) | 3.30 s
    [Task 16/25]  Current/Best:   12.07/  21.30 GFLOPS | Progress: (8/20) | 6.97 s
    [Task 16/25]  Current/Best:    7.58/  21.30 GFLOPS | Progress: (12/20) | 10.36 s
    [Task 16/25]  Current/Best:   20.52/  21.30 GFLOPS | Progress: (16/20) | 13.73 s
    [Task 16/25]  Current/Best:   18.51/  21.30 GFLOPS | Progress: (20/20) | 15.31 s Done.
-
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.34/  22.57 GFLOPS | Progress: (4/20) | 3.90 s
    [Task 17/25]  Current/Best:   11.87/  22.57 GFLOPS | Progress: (8/20) | 7.23 s
    [Task 17/25]  Current/Best:    6.17/  22.57 GFLOPS | Progress: (12/20) | 11.00 s
    [Task 17/25]  Current/Best:   17.40/  22.57 GFLOPS | Progress: (16/20) | 13.32 s
    [Task 17/25]  Current/Best:    7.70/  22.57 GFLOPS | Progress: (20/20) | 16.38 s Done.
-
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:   15.29/  15.95 GFLOPS | Progress: (4/20) | 4.23 s
    [Task 18/25]  Current/Best:   20.44/  20.44 GFLOPS | Progress: (8/20) | 6.45 s
    [Task 18/25]  Current/Best:   10.66/  20.44 GFLOPS | Progress: (12/20) | 10.36 s
    [Task 18/25]  Current/Best:    9.59/  20.44 GFLOPS | Progress: (16/20) | 13.94 s
    [Task 18/25]  Current/Best:   16.98/  20.44 GFLOPS | Progress: (20/20) | 15.84 s Done.
-
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:   17.15/  17.94 GFLOPS | Progress: (4/20) | 5.33 s
    [Task 19/25]  Current/Best:   16.78/  17.94 GFLOPS | Progress: (8/20) | 8.88 s
    [Task 19/25]  Current/Best:    6.29/  18.45 GFLOPS | Progress: (12/20) | 12.60 s
    [Task 19/25]  Current/Best:   13.10/  21.39 GFLOPS | Progress: (16/20) | 16.24 s
    [Task 19/25]  Current/Best:   11.29/  22.46 GFLOPS | Progress: (20/20) | 19.52 s Done.
-
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:   15.87/  18.90 GFLOPS | Progress: (4/20) | 4.24 s
    [Task 20/25]  Current/Best:   16.92/  18.90 GFLOPS | Progress: (8/20) | 6.38 s
    [Task 20/25]  Current/Best:   13.30/  18.90 GFLOPS | Progress: (12/20) | 9.43 s
    [Task 20/25]  Current/Best:   16.29/  18.90 GFLOPS | Progress: (16/20) | 12.89 s
    [Task 20/25]  Current/Best:   18.46/  18.90 GFLOPS | Progress: (20/20) | 15.30 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 21/25]  Current/Best:    7.33/   9.77 GFLOPS | Progress: (4/20) | 4.11 s
    [Task 21/25]  Current/Best:   12.73/  12.73 GFLOPS | Progress: (8/20) | 6.64 s
    [Task 21/25]  Current/Best:   17.92/  21.26 GFLOPS | Progress: (12/20) | 8.87 s
    [Task 21/25]  Current/Best:    9.99/  21.26 GFLOPS | Progress: (16/20) | 11.01 s Done.
-
    [Task 21/25]  Current/Best:   14.43/  21.26 GFLOPS | Progress: (20/20) | 13.27 s Done.
-
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:    5.21/  13.66 GFLOPS | Progress: (4/20) | 4.08 s
    [Task 22/25]  Current/Best:   17.97/  17.97 GFLOPS | Progress: (8/20) | 6.37 s
    [Task 22/25]  Current/Best:   18.50/  18.50 GFLOPS | Progress: (12/20) | 8.32 s
    [Task 22/25]  Current/Best:    9.68/  18.50 GFLOPS | Progress: (16/20) | 10.59 s
    [Task 22/25]  Current/Best:    4.45/  18.50 GFLOPS | Progress: (20/20) | 13.06 s Done.
-
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:    2.67/  18.67 GFLOPS | Progress: (4/20) | 4.69 s
    [Task 23/25]  Current/Best:   12.32/  18.67 GFLOPS | Progress: (8/20) | 7.92 s
    [Task 23/25]  Current/Best:   19.65/  19.65 GFLOPS | Progress: (12/20) | 13.04 s
    [Task 23/25]  Current/Best:    6.14/  22.04 GFLOPS | Progress: (16/20) | 15.80 s
    [Task 23/25]  Current/Best:    4.87/  22.04 GFLOPS | Progress: (20/20) | 19.23 s Done.
-
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    3.50/   3.50 GFLOPS | Progress: (4/20) | 12.15 s
    [Task 24/25]  Current/Best:    2.80/   9.97 GFLOPS | Progress: (8/20) | 23.90 s
    [Task 24/25]  Current/Best:    2.36/   9.97 GFLOPS | Progress: (12/20) | 26.81 s
    [Task 24/25]  Current/Best:    3.82/   9.97 GFLOPS | Progress: (16/20) | 31.71 s
    [Task 24/25]  Current/Best:    5.50/   9.97 GFLOPS | Progress: (20/20) | 42.66 s
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
-
    [Task 25/25]  Current/Best:    7.39/   7.69 GFLOPS | Progress: (4/20) | 3.88 s
    [Task 25/25]  Current/Best:    5.23/   7.69 GFLOPS | Progress: (8/20) | 14.82 s
    [Task 25/25]  Current/Best:    4.06/   7.69 GFLOPS | Progress: (12/20) | 25.75 s
    [Task 25/25]  Current/Best:    3.50/   7.69 GFLOPS | Progress: (16/20) | 27.87 s
    [Task 25/25]  Current/Best:    8.54/   8.54 GFLOPS | Progress: (20/20) | 38.79 s
+
    [Task 10/25]  Current/Best:   10.82/  16.14 GFLOPS | Progress: (4/20) | 4.26 s
    [Task 10/25]  Current/Best:    4.93/  16.39 GFLOPS | Progress: (8/20) | 6.30 s
    [Task 10/25]  Current/Best:    5.61/  16.39 GFLOPS | Progress: (12/20) | 9.19 s
    [Task 10/25]  Current/Best:   15.78/  18.17 GFLOPS | Progress: (16/20) | 11.77 s
    [Task 10/25]  Current/Best:   13.67/  18.17 GFLOPS | Progress: (20/20) | 13.61 s Done.
+
    [Task 11/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 11/25]  Current/Best:    3.11/  17.89 GFLOPS | Progress: (4/20) | 5.24 s
    [Task 11/25]  Current/Best:   13.11/  18.55 GFLOPS | Progress: (8/20) | 7.35 s
    [Task 11/25]  Current/Best:   17.75/  18.55 GFLOPS | Progress: (12/20) | 11.27 s
    [Task 11/25]  Current/Best:    6.18/  18.55 GFLOPS | Progress: (16/20) | 14.02 s
    [Task 11/25]  Current/Best:   23.04/  23.04 GFLOPS | Progress: (20/20) | 17.91 s Done.
+
    [Task 12/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 12/25]  Current/Best:   13.10/  19.82 GFLOPS | Progress: (4/20) | 5.49 s
    [Task 12/25]  Current/Best:    2.96/  19.82 GFLOPS | Progress: (8/20) | 8.93 s
    [Task 12/25]  Current/Best:   11.61/  20.20 GFLOPS | Progress: (12/20) | 12.05 s
    [Task 12/25]  Current/Best:   17.26/  20.20 GFLOPS | Progress: (16/20) | 15.83 s
    [Task 12/25]  Current/Best:   15.00/  20.20 GFLOPS | Progress: (20/20) | 22.21 s Done.
+
    [Task 13/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 13/25]  Current/Best:   12.58/  18.74 GFLOPS | Progress: (4/20) | 4.83 s
    [Task 13/25]  Current/Best:   16.95/  21.42 GFLOPS | Progress: (8/20) | 8.09 s
    [Task 13/25]  Current/Best:   21.02/  21.42 GFLOPS | Progress: (12/20) | 12.42 s
    [Task 13/25]  Current/Best:   16.78/  21.42 GFLOPS | Progress: (16/20) | 14.67 s
    [Task 13/25]  Current/Best:   11.86/  21.42 GFLOPS | Progress: (20/20) | 17.40 s Done.
+
    [Task 14/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 14/25]  Current/Best:   14.86/  14.86 GFLOPS | Progress: (4/20) | 4.04 s
    [Task 14/25]  Current/Best:    8.44/  14.86 GFLOPS | Progress: (8/20) | 8.61 s
    [Task 14/25]  Current/Best:   19.22/  19.22 GFLOPS | Progress: (12/20) | 11.21 s
    [Task 14/25]  Current/Best:    9.43/  19.22 GFLOPS | Progress: (16/20) | 15.15 s
    [Task 14/25]  Current/Best:    4.47/  19.22 GFLOPS | Progress: (20/20) | 20.70 s Done.
+
    [Task 15/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 15/25]  Current/Best:   14.06/  16.30 GFLOPS | Progress: (4/20) | 3.65 s
    [Task 15/25]  Current/Best:   13.99/  20.16 GFLOPS | Progress: (8/20) | 7.21 s
    [Task 15/25]  Current/Best:   13.00/  20.16 GFLOPS | Progress: (12/20) | 12.98 s
    [Task 15/25]  Current/Best:   11.53/  20.16 GFLOPS | Progress: (16/20) | 19.85 s
    [Task 15/25]  Current/Best:   15.49/  20.16 GFLOPS | Progress: (20/20) | 22.77 s
    [Task 16/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 16/25]  Current/Best:   17.88/  17.88 GFLOPS | Progress: (4/20) | 4.18 s
    [Task 16/25]  Current/Best:    6.22/  17.88 GFLOPS | Progress: (8/20) | 5.82 s
    [Task 16/25]  Current/Best:   14.30/  17.88 GFLOPS | Progress: (12/20) | 7.80 s
    [Task 16/25]  Current/Best:   16.02/  17.88 GFLOPS | Progress: (16/20) | 9.46 s
    [Task 16/25]  Current/Best:   13.39/  20.58 GFLOPS | Progress: (20/20) 
 | 11.26 s Done.
+
    [Task 17/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 17/25]  Current/Best:   13.93/  13.93 GFLOPS | Progress: (4/20) | 5.15 s
    [Task 17/25]  Current/Best:   15.36/  18.71 GFLOPS | Progress: (8/20) | 8.68 s
    [Task 17/25]  Current/Best:   14.10/  18.71 GFLOPS | Progress: (12/20) | 12.51 s
    [Task 17/25]  Current/Best:    9.52/  18.71 GFLOPS | Progress: (16/20) | 15.68 s
    [Task 17/25]  Current/Best:    9.64/  22.97 GFLOPS | Progress: (20/20) | 19.13 s Done.
+
    [Task 18/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 18/25]  Current/Best:    9.61/  21.86 GFLOPS | Progress: (4/20) | 4.38 s
    [Task 18/25]  Current/Best:    8.41/  21.86 GFLOPS | Progress: (8/20) | 7.84 s
    [Task 18/25]  Current/Best:    6.84/  21.86 GFLOPS | Progress: (12/20) | 10.09 s
    [Task 18/25]  Current/Best:   10.14/  21.86 GFLOPS | Progress: (16/20) | 13.78 s
    [Task 18/25]  Current/Best:    9.08/  21.86 GFLOPS | Progress: (20/20) | 17.94 s Done.
+
    [Task 19/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 19/25]  Current/Best:    4.60/  11.15 GFLOPS | Progress: (4/20) | 8.37 s
    [Task 19/25]  Current/Best:   12.57/  18.02 GFLOPS | Progress: (8/20) | 10.77 s
    [Task 19/25]  Current/Best:   11.08/  18.02 GFLOPS | Progress: (12/20) | 16.86 s
    [Task 19/25]  Current/Best:   18.60/  19.64 GFLOPS | Progress: (16/20) | 19.67 s
    [Task 19/25]  Current/Best:   12.07/  19.64 GFLOPS | Progress: (20/20) | 22.29 s Done.
+
    [Task 20/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 20/25]  Current/Best:   12.78/  17.75 GFLOPS | Progress: (4/20) | 5.12 s
    [Task 20/25]  Current/Best:    8.20/  17.75 GFLOPS | Progress: (8/20) | 10.56 s
    [Task 20/25]  Current/Best:    5.56/  18.50 GFLOPS | Progress: (12/20) | 14.10 s
    [Task 20/25]  Current/Best:    3.68/  18.50 GFLOPS | Progress: (16/20) | 17.78 s
    [Task 20/25]  Current/Best:    8.79/  18.50 GFLOPS | Progress: (20/20) | 22.33 s
    [Task 21/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s Done.
+     Done.
+
    [Task 21/25]  Current/Best:   12.25/  14.44 GFLOPS | Progress: (4/20) | 4.43 s
    [Task 21/25]  Current/Best:    2.07/  20.69 GFLOPS | Progress: (8/20) | 6.42 s
    [Task 21/25]  Current/Best:   21.08/  21.08 GFLOPS | Progress: (12/20) | 9.31 s
    [Task 21/25]  Current/Best:   12.97/  21.08 GFLOPS | Progress: (16/20) | 12.50 s
    [Task 21/25]  Current/Best:   14.01/  21.08 GFLOPS | Progress: (20/20) | 14.90 s
    [Task 22/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 22/25]  Current/Best:   18.35/  20.35 GFLOPS | Progress: (4/20) | 3.47 s
    [Task 22/25]  Current/Best:    5.92/  20.35 GFLOPS | Progress: (8/20) | 6.80 s
    [Task 22/25]  Current/Best:    5.24/  20.35 GFLOPS | Progress: (12/20) | 10.13 s
    [Task 22/25]  Current/Best:   11.46/  20.35 GFLOPS | Progress: (16/20) | 12.50 s
    [Task 22/25]  Current/Best:   17.84/  20.35 GFLOPS | Progress: (20/20) | 14.21 s Done.
+
    [Task 23/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 23/25]  Current/Best:   10.59/  18.18 GFLOPS | Progress: (4/20) | 4.39 s
    [Task 23/25]  Current/Best:   19.46/  22.16 GFLOPS | Progress: (8/20) | 6.48 s
    [Task 23/25]  Current/Best:    5.03/  22.16 GFLOPS | Progress: (12/20) | 11.02 s
    [Task 23/25]  Current/Best:   21.28/  22.16 GFLOPS | Progress: (16/20) | 13.39 s
    [Task 23/25]  Current/Best:   21.30/  22.16 GFLOPS | Progress: (20/20) | 18.17 s Done.
+
    [Task 24/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 24/25]  Current/Best:    5.70/   7.02 GFLOPS | Progress: (4/20) | 12.75 s
    [Task 24/25]  Current/Best:    5.28/   8.79 GFLOPS | Progress: (8/20) | 24.41 s
    [Task 24/25]  Current/Best:    3.37/   8.79 GFLOPS | Progress: (12/20) | 35.37 s Done.
+
    [Task 24/25]  Current/Best:    6.51/   8.79 GFLOPS | Progress: (16/20) | 46.00 s
    [Task 24/25]  Current/Best:    3.13/   8.79 GFLOPS | Progress: (20/20) | 56.67 s
    [Task 25/25]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/20) | 0.00 s
    [Task 25/25]  Current/Best:    5.63/   5.63 GFLOPS | Progress: (4/20) | 13.96 s
    [Task 25/25]  Current/Best:    5.68/   9.28 GFLOPS | Progress: (8/20) | 16.10 s
    [Task 25/25]  Current/Best:    2.90/   9.28 GFLOPS | Progress: (12/20) | 21.09 s
    [Task 25/25]  Current/Best:    1.52/   9.28 GFLOPS | Progress: (16/20) | 32.06 s
    [Task 25/25]  Current/Best:    4.21/   9.28 GFLOPS | Progress: (20/20) | 43.05 s
 
 
 
@@ -664,8 +664,8 @@ Verify that the optimized model runs and produces the same results:
 
  .. code-block:: none
 
-    class='n02123045 tabby, tabby cat' with probability=0.621103
-    class='n02123159 tiger cat' with probability=0.356379
+    class='n02123045 tabby, tabby cat' with probability=0.621104
+    class='n02123159 tiger cat' with probability=0.356378
     class='n02124075 Egyptian cat' with probability=0.019712
     class='n02129604 tiger, Panthera tigris' with probability=0.001215
     class='n04040759 radiator' with probability=0.000262
@@ -722,8 +722,8 @@ improvement in comparing the optimized model to the unoptimized model.
 
  .. code-block:: none
 
-    optimized: {'mean': 411.6475026200351, 'median': 412.2672734001753, 'std': 3.0716165239000963}
-    unoptimized: {'mean': 514.1060797499813, 'median': 513.3606129998952, 'std': 2.953015519281599}
+    optimized: {'mean': 420.37608743999954, 'median': 420.17338465000194, 'std': 3.391873632834553}
+    unoptimized: {'mean': 514.1226119299996, 'median': 514.5901267999989, 'std': 2.544468258735559}
 
 
 
@@ -746,7 +746,7 @@ profiling/benchmarking.
 
 .. rst-class:: sphx-glr-timing
 
-   **Total running time of the script:** ( 11 minutes  38.791 seconds)
+   **Total running time of the script:** ( 12 minutes  55.388 seconds)
 
 
 .. _sphx_glr_download_tutorial_autotvm_relay_x86.py:
diff --git a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
index 054df7b66d..486aa1a999 100644
--- a/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
+++ b/docs/_sources/tutorial/cross_compilation_and_rpc.rst.txt
@@ -274,7 +274,7 @@ device and returns the measured cost. Network overhead is excluded.
 
  .. code-block:: none
 
-    1.288e-07 secs/op
+    1.268e-07 secs/op
 
 
 
diff --git a/docs/_sources/tutorial/intro_topi.rst.txt b/docs/_sources/tutorial/intro_topi.rst.txt
index 0e508a64f9..da12798a68 100644
--- a/docs/_sources/tutorial/intro_topi.rst.txt
+++ b/docs/_sources/tutorial/intro_topi.rst.txt
@@ -270,7 +270,7 @@ As you can see, scheduled stages of computation have been accumulated and we can
 
  .. code-block:: none
 
-    [stage(a, placeholder(a, 0x198303f0)), stage(b, placeholder(b, 0x20a42140)), stage(T_add, compute(T_add, body=[a[ax0, ax1, ax2] + b[ax1, ax2]], axis=[T.iter_var(ax0, T.Range(0, 100), "DataPar", ""), T.iter_var(ax1, T.Range(0, 10), "DataPar", ""), T.iter_var(ax2, T.Range(0, 10), "DataPar", "")], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[a[ax0, ax1, ax2] * b[ax1, ax2]], axis=[T.iter_var(ax0, T.Range(0, 100), "DataPar", ""), T.iter_var(ax1, T [...]
+    [stage(a, placeholder(a, 0x20609e00)), stage(b, placeholder(b, 0x2077c710)), stage(T_add, compute(T_add, body=[a[ax0, ax1, ax2] + b[ax1, ax2]], axis=[T.iter_var(ax0, T.Range(0, 100), "DataPar", ""), T.iter_var(ax1, T.Range(0, 10), "DataPar", ""), T.iter_var(ax2, T.Range(0, 10), "DataPar", "")], reduce_axis=[], tag=broadcast, attrs={})), stage(T_multiply, compute(T_multiply, body=[a[ax0, ax1, ax2] * b[ax1, ax2]], axis=[T.iter_var(ax0, T.Range(0, 100), "DataPar", ""), T.iter_var(ax1, T [...]
 
 
 
diff --git a/docs/_sources/tutorial/sg_execution_times.rst.txt b/docs/_sources/tutorial/sg_execution_times.rst.txt
index 2f2bf5a11e..9a5372cc60 100644
--- a/docs/_sources/tutorial/sg_execution_times.rst.txt
+++ b/docs/_sources/tutorial/sg_execution_times.rst.txt
@@ -5,29 +5,29 @@
 
 Computation times
 =================
-**15:06.764** total execution time for **tutorial** files:
+**16:09.390** total execution time for **tutorial** files:
 
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 11:38.791 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_relay_x86.py` (``autotvm_relay_x86.py``)                 | 12:55.388 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:29.212 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_auto_scheduler_matmul_x86.py` (``auto_scheduler_matmul_x86.py``) | 01:11.740 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 00:58.977 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_expr_get_started.py` (``tensor_expr_get_started.py``)     | 01:00.937 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:34.798 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_relay_quick_start.py` (``relay_quick_start.py``)                 | 00:35.401 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:23.385 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_autotvm_matmul_x86.py` (``autotvm_matmul_x86.py``)               | 00:23.477 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.819 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:01.465 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_tensor_ir_blitz_course.py` (``tensor_ir_blitz_course.py``)       | 00:00.619 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_intro_topi.py` (``intro_topi.py``)                               | 00:00.821 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.162 | 0.0 MB |
-+------------------------------------------------------------------------------------------+-----------+--------+
-| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.000 | 0.0 MB |
+| :ref:`sphx_glr_tutorial_cross_compilation_and_rpc.py` (``cross_compilation_and_rpc.py``) | 00:00.161 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_uma.py` (``uma.py``)                                             | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
+| :ref:`sphx_glr_tutorial_introduction.py` (``introduction.py``)                           | 00:00.000 | 0.0 MB |
++------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_command_line_driver.py` (``tvmc_command_line_driver.py``)   | 00:00.000 | 0.0 MB |
 +------------------------------------------------------------------------------------------+-----------+--------+
 | :ref:`sphx_glr_tutorial_tvmc_python.py` (``tvmc_python.py``)                             | 00:00.000 | 0.0 MB |
diff --git a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
index 7964581112..739bd23e91 100644
--- a/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
+++ b/docs/_sources/tutorial/tensor_expr_get_started.rst.txt
@@ -498,10 +498,10 @@ We can now compare the different schedules
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                   numpy    7.2933599949465134e-06                   1.0
-                   naive              6.6998e-06      0.9186163859513606
-                parallel    6.995700000000001e-06     0.9591875356279194
-                  vector             2.47294e-05      3.3906731625937456
+                   numpy    6.601959998988605e-06                    1.0
+                   naive              6.6844e-06      1.0124872009257895
+                parallel              7.0532e-06       1.068349399432975
+                  vector             2.48202e-05      3.7595199007268083
 
 
 
@@ -922,7 +922,7 @@ matrix multiplication.
 
  .. code-block:: none
 
-    Numpy running time: 0.017826
+    Numpy running time: 0.018690
 
 
 
@@ -980,7 +980,7 @@ optimizations.
 
  .. code-block:: none
 
-    none: 3.263024
+    none: 3.410831
 
 
 
@@ -1080,7 +1080,7 @@ schedule.
 
  .. code-block:: none
 
-    blocking: 0.298400
+    blocking: 0.298940
 
 
 
@@ -1164,7 +1164,7 @@ already cache friendly from our previous optimizations.
 
  .. code-block:: none
 
-    vectorization: 0.335828
+    vectorization: 0.330297
     @I.ir_module
     class Module:
         @T.prim_func
@@ -1230,7 +1230,7 @@ more cache friendly.
 
  .. code-block:: none
 
-    loop permutation: 0.116744
+    loop permutation: 0.118682
     @I.ir_module
     class Module:
         @T.prim_func
@@ -1321,7 +1321,7 @@ optimized schedule.
 
  .. code-block:: none
 
-    array packing: 0.109167
+    array packing: 0.109763
     @I.ir_module
     class Module:
         @T.prim_func
@@ -1404,7 +1404,7 @@ to `C` when all the block results are ready.
 
  .. code-block:: none
 
-    block caching: 0.110256
+    block caching: 0.110120
     @I.ir_module
     class Module:
         @T.prim_func
@@ -1478,7 +1478,7 @@ of thread-level parallelization.
 
  .. code-block:: none
 
-    parallelization: 0.146220
+    parallelization: 0.146180
     @I.ir_module
     class Module:
         @T.prim_func
@@ -1548,13 +1548,13 @@ working, we can compare the results.
  .. code-block:: none
 
                 Operator                  Timing             Performance
-                    none            3.2630241653                     1.0
-                blocking            0.2984000276     0.09144891747149085
-           vectorization            0.3358284045     0.10291937401852622
-        loop permutation     0.11674430870000001     0.03577794793599594
-           array packing              0.10916693     0.03345575284452828
-           block caching            0.1102562794    0.033789599406740256
-         parallelization     0.14622028609999999     0.04481127895249792
+                    none      3.4108306232000003                     1.0
+                blocking            0.2989398201     0.08764428760157496
+           vectorization            0.3302969245     0.09683768002238685
+        loop permutation             0.118681628     0.03479552083083338
+           array packing            0.1097628463    0.032180679261353005
+           block caching            0.1101196099     0.03228527653967382
+         parallelization            0.1461797622     0.04285752602480621
 
 
 
@@ -1594,6 +1594,11 @@ operations with tunable parameters that allows you to automatically optimize
 the computation for specific platforms.
 
 
+.. rst-class:: sphx-glr-timing
+
+   **Total running time of the script:** ( 1 minutes  0.937 seconds)
+
+
 .. _sphx_glr_download_tutorial_tensor_expr_get_started.py:
 
 .. only:: html
diff --git a/docs/commit_hash b/docs/commit_hash
index 7256ec3a11..8e087f9590 100644
--- a/docs/commit_hash
+++ b/docs/commit_hash
@@ -1 +1 @@
-6c5be6fbd062a5cd431f09a1d87ac614cee73a39
+da99e9d1b5208e9a23e0b8e5b45da6e633f05415
diff --git a/docs/genindex.html b/docs/genindex.html
index 1e57b29243..479e37db47 100644
--- a/docs/genindex.html
+++ b/docs/genindex.html
@@ -485,8 +485,6 @@
 </li>
       <li><a href="reference/api/python/tir.html#tvm.tir.transform.Apply">Apply() (in module tvm.tir.transform)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/api/python/autotvm.html#tvm.autotvm.task.space.AnnotateEntity.apply">apply() (tvm.autotvm.task.space.AnnotateEntity method)</a>
 
       <ul>
@@ -495,6 +493,8 @@
         <li><a href="reference/api/python/autotvm.html#tvm.autotvm.task.space.SplitEntity.apply">(tvm.autotvm.task.space.SplitEntity method)</a>
 </li>
       </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/api/python/auto_scheduler.html#tvm.auto_scheduler.SearchTask.apply_best">apply_best() (tvm.auto_scheduler.SearchTask method)</a>
 </li>
       <li><a href="reference/api/python/autotvm.html#tvm.autotvm.apply_history_best">apply_history_best() (in module tvm.autotvm)</a>
@@ -561,8 +561,18 @@
 </li>
       <li><a href="reference/api/python/tir.html#tvm.tir.assume">assume() (in module tvm.tir)</a>
 </li>
-      <li><a href="reference/api/python/ir.html#tvm.ir.Node.astext">astext() (tvm.ir.Node method)</a>
+      <li><a href="reference/api/python/relay/dataflow_pattern.html#tvm.relay.dataflow_pattern.astext">astext() (in module tvm.relay.dataflow_pattern)</a>
+
+      <ul>
+        <li><a href="reference/api/python/ir.html#tvm.ir.GlobalVar.astext">(tvm.ir.GlobalVar method)</a>
+</li>
+        <li><a href="reference/api/python/ir.html#tvm.ir.IRModule.astext">(tvm.ir.IRModule method)</a>
+</li>
+        <li><a href="reference/api/python/ir.html#tvm.ir.Op.astext">(tvm.ir.Op method)</a>
 </li>
+        <li><a href="reference/api/python/relay/dataflow_pattern.html#tvm.relay.dataflow_pattern.DFPattern.astext">(tvm.relay.dataflow_pattern.DFPattern method)</a>
+</li>
+      </ul></li>
       <li><a href="reference/api/python/te.html#tvm.te.atan">atan() (in module tvm.te)</a>
 
       <ul>
@@ -3210,7 +3220,7 @@
       </ul></li>
       <li><a href="reference/api/python/relay/testing.html#tvm.relay.testing.Prelude">Prelude (class in tvm.relay.testing)</a>
 </li>
-      <li><a href="reference/api/python/ir.html#tvm.ir.pretty_print">pretty_print() (in module tvm.ir)</a>
+      <li><a href="reference/api/python/relay/dataflow_pattern.html#tvm.relay.dataflow_pattern.pretty_print">pretty_print() (in module tvm.relay.dataflow_pattern)</a>
 </li>
       <li><a href="reference/api/python/tir.html#tvm.tir.transform.prim_func_pass">prim_func_pass() (in module tvm.tir.transform)</a>
 </li>
diff --git a/docs/how_to/compile_models/from_darknet.html b/docs/how_to/compile_models/from_darknet.html
index 452b801542..f109d49d5a 100644
--- a/docs/how_to/compile_models/from_darknet.html
+++ b/docs/how_to/compile_models/from_darknet.html
@@ -585,7 +585,7 @@ class:[&#39;truck 0.9266&#39;] left:471 top:83 right:689 bottom:169
 class:[&#39;bicycle 0.9984&#39;] left:111 top:113 right:577 bottom:447
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  14.880 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  16.940 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-darknet-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7716f96385bd5abb6e822041e285be54/from_darknet.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_darknet.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/from_keras.html b/docs/how_to/compile_models/from_keras.html
index 0be99cafc4..af50931a9b 100644
--- a/docs/how_to/compile_models/from_keras.html
+++ b/docs/how_to/compile_models/from_keras.html
@@ -506,7 +506,7 @@ Tensorflow is also required since it’s used as the default backend of keras.</
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Relay top-1 id: 285, class name: Egyptian cat
 
 1/1 [==============================] - ETA: 0s
-1/1 [==============================] - 1s 880ms/step
+1/1 [==============================] - 1s 946ms/step
 Keras top-1 id: 285, class name: Egyptian cat
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_mxnet.html b/docs/how_to/compile_models/from_mxnet.html
index 218efd1a20..47380e7737 100644
--- a/docs/how_to/compile_models/from_mxnet.html
+++ b/docs/how_to/compile_models/from_mxnet.html
@@ -439,7 +439,7 @@
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;x&quot;</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">x</span><span class="o">.</span><span class="n">shape</span></a><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zip362a85cc-5fe7-4587-9c52-f98a850c7209 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
+<img src="../../_images/sphx_glr_from_mxnet_001.png" srcset="../../_images/sphx_glr_from_mxnet_001.png" alt="from mxnet" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/resnet18_v1-a0666292.zipb42336e4-438b-476c-8eca-027629a47300 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/resnet18_v1-a0666292.zip...
 x (1, 3, 224, 224)
 </pre></div>
 </div>
diff --git a/docs/how_to/compile_models/from_oneflow.html b/docs/how_to/compile_models/from_oneflow.html
index 62b4bfc7c1..e9414ed258 100644
--- a/docs/how_to/compile_models/from_oneflow.html
+++ b/docs/how_to/compile_models/from_oneflow.html
@@ -449,13 +449,14 @@ Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdo
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading: &quot;https://oneflow-public.oss-cn-beijing.aliyuncs.com/model_zoo/flowvision/classification/ResNet/resnet18.zip&quot; to /workspace/.oneflow/flowvision_cache/resnet18.zip
 
   0%|          | 0.00/41.5M [00:00&lt;?, ?B/s]
- 19%|#9        | 7.99M/41.5M [00:00&lt;00:00, 44.4MB/s]
- 35%|###4      | 14.3M/41.5M [00:00&lt;00:00, 53.6MB/s]
- 54%|#####3    | 22.3M/41.5M [00:00&lt;00:00, 54.6MB/s]
- 67%|######6   | 27.7M/41.5M [00:00&lt;00:00, 52.9MB/s]
- 82%|########2 | 34.1M/41.5M [00:00&lt;00:00, 55.9MB/s]
- 96%|#########6| 40.0M/41.5M [00:00&lt;00:00, 55.6MB/s]
-100%|##########| 41.5M/41.5M [00:00&lt;00:00, 54.8MB/s]
+ 15%|#5        | 6.33M/41.5M [00:00&lt;00:00, 47.7MB/s]
+ 26%|##6       | 10.9M/41.5M [00:00&lt;00:00, 47.6MB/s]
+ 39%|###8      | 16.0M/41.5M [00:00&lt;00:00, 49.0MB/s]
+ 54%|#####3    | 22.3M/41.5M [00:00&lt;00:00, 41.5MB/s]
+ 64%|######3   | 26.5M/41.5M [00:00&lt;00:00, 37.5MB/s]
+ 77%|#######7  | 32.0M/41.5M [00:00&lt;00:00, 41.5MB/s]
+ 92%|#########2| 38.3M/41.5M [00:00&lt;00:00, 38.8MB/s]
+100%|##########| 41.5M/41.5M [00:01&lt;00:00, 42.1MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_pytorch.html b/docs/how_to/compile_models/from_pytorch.html
index 0990e7c658..cab55b695b 100644
--- a/docs/how_to/compile_models/from_pytorch.html
+++ b/docs/how_to/compile_models/from_pytorch.html
@@ -432,10 +432,11 @@ be unstable.</p>
 Downloading: &quot;https://download.pytorch.org/models/resnet18-f37072fd.pth&quot; to /workspace/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
 
   0%|          | 0.00/44.7M [00:00&lt;?, ?B/s]
- 30%|###       | 13.6M/44.7M [00:00&lt;00:00, 142MB/s]
- 65%|######4   | 28.9M/44.7M [00:00&lt;00:00, 153MB/s]
- 97%|#########7| 43.4M/44.7M [00:00&lt;00:00, 118MB/s]
-100%|##########| 44.7M/44.7M [00:00&lt;00:00, 119MB/s]
+ 14%|#4        | 6.30M/44.7M [00:00&lt;00:00, 62.9MB/s]
+ 28%|##7       | 12.3M/44.7M [00:00&lt;00:00, 56.2MB/s]
+ 56%|#####5    | 24.9M/44.7M [00:00&lt;00:00, 88.4MB/s]
+ 88%|########7 | 39.3M/44.7M [00:00&lt;00:00, 112MB/s]
+100%|##########| 44.7M/44.7M [00:00&lt;00:00, 96.2MB/s]
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/compile_models/from_tensorflow.html b/docs/how_to/compile_models/from_tensorflow.html
index 8dbe717fd1..d229f2acc1 100644
--- a/docs/how_to/compile_models/from_tensorflow.html
+++ b/docs/how_to/compile_models/from_tensorflow.html
@@ -649,7 +649,7 @@ banana (score = 0.00022)
 desk (score = 0.00019)
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  17.877 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  21.156 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-compile-models-from-tensorflow-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7f1d3d1b878694c201c614c807cdebc8/from_tensorflow.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">from_tensorflow.py</span></code></a></p>
diff --git a/docs/how_to/compile_models/sg_execution_times.html b/docs/how_to/compile_models/sg_execution_times.html
index 331e9d2d9a..9e9ccdefde 100644
--- a/docs/how_to/compile_models/sg_execution_times.html
+++ b/docs/how_to/compile_models/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-compile-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>06:07.783</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
+<p><strong>06:21.358</strong> total execution time for <strong>how_to_compile_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 81%" />
@@ -349,43 +349,43 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tensorflow.html#sphx-glr-how-to-compile-models-from-tensorflow-py"><span class="std std-ref">Compile Tensorflow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tensorflow.py</span></code>)</p></td>
-<td><p>01:17.877</p></td>
+<td><p>01:21.156</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_darknet.html#sphx-glr-how-to-compile-models-from-darknet-py"><span class="std std-ref">Compile YOLO-V2 and YOLO-V3 in DarkNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_darknet.py</span></code>)</p></td>
-<td><p>01:14.880</p></td>
+<td><p>01:16.940</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_paddle.html#sphx-glr-how-to-compile-models-from-paddle-py"><span class="std std-ref">Compile PaddlePaddle Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_paddle.py</span></code>)</p></td>
-<td><p>00:50.409</p></td>
+<td><p>00:52.745</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_oneflow.html#sphx-glr-how-to-compile-models-from-oneflow-py"><span class="std std-ref">Compile OneFlow Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_oneflow.py</span></code>)</p></td>
-<td><p>00:33.733</p></td>
+<td><p>00:35.607</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_mxnet.html#sphx-glr-how-to-compile-models-from-mxnet-py"><span class="std std-ref">Compile MXNet Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_mxnet.py</span></code>)</p></td>
-<td><p>00:29.568</p></td>
+<td><p>00:30.318</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_coreml.html#sphx-glr-how-to-compile-models-from-coreml-py"><span class="std std-ref">Compile CoreML Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_coreml.py</span></code>)</p></td>
-<td><p>00:29.300</p></td>
+<td><p>00:30.047</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_tflite.html#sphx-glr-how-to-compile-models-from-tflite-py"><span class="std std-ref">Compile TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_tflite.py</span></code>)</p></td>
-<td><p>00:26.765</p></td>
+<td><p>00:27.172</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_pytorch.html#sphx-glr-how-to-compile-models-from-pytorch-py"><span class="std std-ref">Compile PyTorch Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_pytorch.py</span></code>)</p></td>
-<td><p>00:23.543</p></td>
+<td><p>00:24.515</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="from_keras.html#sphx-glr-how-to-compile-models-from-keras-py"><span class="std std-ref">Compile Keras Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_keras.py</span></code>)</p></td>
-<td><p>00:19.171</p></td>
+<td><p>00:20.262</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="from_onnx.html#sphx-glr-how-to-compile-models-from-onnx-py"><span class="std std-ref">Compile ONNX Models</span></a> (<code class="docutils literal notranslate"><span class="pre">from_onnx.py</span></code>)</p></td>
-<td><p>00:02.536</p></td>
+<td><p>00:02.597</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/deploy_models/deploy_model_on_adreno.html b/docs/how_to/deploy_models/deploy_model_on_adreno.html
index 0ef815317a..550c5f188c 100644
--- a/docs/how_to/deploy_models/deploy_model_on_adreno.html
+++ b/docs/how_to/deploy_models/deploy_model_on_adreno.html
@@ -920,7 +920,7 @@ Top5 predictions:
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
- 2685.0209    2684.0255    2688.4638    2683.0311      1.9015
+ 2544.0319    2543.2676    2549.7816    2542.3935      2.0287
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-model-on-adreno-py">
diff --git a/docs/how_to/deploy_models/deploy_model_on_android.html b/docs/how_to/deploy_models/deploy_model_on_android.html
index 7ca52b0c6f..311e32b529 100644
--- a/docs/how_to/deploy_models/deploy_model_on_android.html
+++ b/docs/how_to/deploy_models/deploy_model_on_android.html
@@ -662,7 +662,7 @@ to the remote android device.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  15.5922      15.5360      15.8489      15.4871       0.1193
+  16.3474      16.5616      16.7901      15.5248       0.4458
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
index 427d4a6e4c..22aa76c414 100644
--- a/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
+++ b/docs/how_to/deploy_models/deploy_object_detection_pytorch.html
@@ -454,25 +454,29 @@ be unstable.</p>
 Downloading: &quot;https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth&quot; to /workspace/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
 
   0%|          | 0.00/170M [00:00&lt;?, ?B/s]
-  5%|4         | 7.99M/170M [00:00&lt;00:02, 64.5MB/s]
-  9%|9         | 16.0M/170M [00:00&lt;00:02, 69.0MB/s]
- 14%|#4        | 24.0M/170M [00:00&lt;00:02, 71.5MB/s]
- 19%|#8        | 32.0M/170M [00:00&lt;00:01, 75.2MB/s]
- 24%|##3       | 40.0M/170M [00:00&lt;00:01, 71.0MB/s]
- 28%|##8       | 48.0M/170M [00:00&lt;00:01, 74.5MB/s]
- 35%|###4      | 59.3M/170M [00:00&lt;00:01, 87.9MB/s]
- 41%|####1     | 70.0M/170M [00:00&lt;00:01, 95.2MB/s]
- 47%|####7     | 80.0M/170M [00:01&lt;00:01, 90.9MB/s]
- 52%|#####2    | 88.8M/170M [00:01&lt;00:01, 62.6MB/s]
- 57%|#####6    | 96.0M/170M [00:01&lt;00:01, 64.9MB/s]
- 62%|######1   | 104M/170M [00:01&lt;00:00, 70.6MB/s]
- 69%|######8   | 116M/170M [00:01&lt;00:00, 84.3MB/s]
- 74%|#######3  | 125M/170M [00:01&lt;00:00, 59.1MB/s]
- 78%|#######7  | 132M/170M [00:02&lt;00:00, 57.3MB/s]
- 85%|########4 | 144M/170M [00:02&lt;00:00, 64.4MB/s]
- 89%|########9 | 152M/170M [00:02&lt;00:00, 60.5MB/s]
- 96%|#########5| 163M/170M [00:02&lt;00:00, 71.2MB/s]
-100%|##########| 170M/170M [00:02&lt;00:00, 71.5MB/s]
+  5%|4         | 8.00M/170M [00:00&lt;00:02, 63.5MB/s]
+ 11%|#         | 18.2M/170M [00:00&lt;00:01, 86.2MB/s]
+ 16%|#5        | 26.7M/170M [00:00&lt;00:02, 52.7MB/s]
+ 19%|#9        | 32.9M/170M [00:00&lt;00:02, 48.7MB/s]
+ 24%|##3       | 40.4M/170M [00:00&lt;00:02, 56.3MB/s]
+ 28%|##8       | 48.4M/170M [00:00&lt;00:02, 63.4MB/s]
+ 33%|###2      | 56.0M/170M [00:00&lt;00:02, 59.1MB/s]
+ 38%|###7      | 64.0M/170M [00:01&lt;00:02, 53.9MB/s]
+ 42%|####2     | 72.0M/170M [00:01&lt;00:01, 59.1MB/s]
+ 47%|####7     | 80.0M/170M [00:01&lt;00:01, 60.7MB/s]
+ 52%|#####1    | 88.0M/170M [00:01&lt;00:01, 56.2MB/s]
+ 57%|#####6    | 96.0M/170M [00:01&lt;00:01, 60.4MB/s]
+ 61%|######1   | 104M/170M [00:01&lt;00:01, 60.7MB/s]
+ 66%|######6   | 112M/170M [00:01&lt;00:00, 66.8MB/s]
+ 71%|#######   | 120M/170M [00:02&lt;00:00, 70.2MB/s]
+ 75%|#######4  | 127M/170M [00:02&lt;00:00, 61.5MB/s]
+ 78%|#######8  | 133M/170M [00:02&lt;00:00, 45.1MB/s]
+ 84%|########3 | 142M/170M [00:02&lt;00:00, 55.0MB/s]
+ 87%|########7 | 148M/170M [00:02&lt;00:00, 52.1MB/s]
+ 91%|######### | 154M/170M [00:02&lt;00:00, 49.0MB/s]
+ 94%|#########4| 160M/170M [00:02&lt;00:00, 50.0MB/s]
+ 98%|#########7| 166M/170M [00:03&lt;00:00, 51.3MB/s]
+100%|##########| 170M/170M [00:03&lt;00:00, 56.6MB/s]
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torch/nn/functional.py:3897: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
   for i in range(dim)
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/torchvision/models/detection/anchor_utils.py:124: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the &#39;trunc&#39; function NOT &#39;floor&#39;). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode=&#39;trunc&#39;), or for actual floor division, use torch.div(a, b, rounding_mode=& [...]
@@ -570,7 +574,7 @@ torchvision rcnn models.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Get 9 valid boxes
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  22.255 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  27.860 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-object-detection-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7795da4b258c8feff986668b95ef57ad/deploy_object_detection_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_object_detection_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized.html b/docs/how_to/deploy_models/deploy_prequantized.html
index aed0837d28..c3ee7bf2a3 100644
--- a/docs/how_to/deploy_models/deploy_prequantized.html
+++ b/docs/how_to/deploy_models/deploy_prequantized.html
@@ -495,8 +495,8 @@ training. Other models require a full post training calibration.</p>
 Downloading: &quot;https://download.pytorch.org/models/mobilenet_v2-b0353104.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
 
   0%|          | 0.00/13.6M [00:00&lt;?, ?B/s]
- 59%|#####8    | 7.99M/13.6M [00:00&lt;00:00, 50.9MB/s]
-100%|##########| 13.6M/13.6M [00:00&lt;00:00, 53.9MB/s]
+ 59%|#####8    | 7.99M/13.6M [00:00&lt;00:00, 60.1MB/s]
+100%|##########| 13.6M/13.6M [00:00&lt;00:00, 62.2MB/s]
 </pre></div>
 </div>
 </div>
@@ -587,7 +587,7 @@ output values are identical out of 1000 outputs from mobilenet v2.</p>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  90.1450      90.0606      94.1685      89.8995       0.4423
+  90.5382      90.5035      92.3326      90.2597       0.2281
 </pre></div>
 </div>
 <div class="admonition note">
@@ -626,7 +626,7 @@ This includes support for the VNNI 8 bit dot product instruction (CascadeLake or
 <div class="section" id="deploy-a-quantized-tflite-model">
 <h2>Deploy a quantized TFLite Model<a class="headerlink" href="#deploy-a-quantized-tflite-model" title="Permalink to this headline">¶</a></h2>
 <p>TODO</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  11.751 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  13.060 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/fb8217c13f4351224c6cf3aacf1a87fc/deploy_prequantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_prequantized_tflite.html b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
index 8c962ba314..3ef89b6320 100644
--- a/docs/how_to/deploy_models/deploy_prequantized_tflite.html
+++ b/docs/how_to/deploy_models/deploy_prequantized_tflite.html
@@ -580,7 +580,7 @@ TFLite Top-5 labels: [387 102 386 341 349]
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  118.1641     118.2210     119.2519     117.1100      0.4611
+  121.2507     121.2005     125.3829     120.1212      0.5733
 </pre></div>
 </div>
 <div class="admonition note">
@@ -608,7 +608,7 @@ network for ARM CPU</span></a>.</p></li>
 </ul>
 </div></blockquote>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  36.709 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 2 minutes  33.738 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-prequantized-tflite-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/56691c7a27d45da61d112276334640d3/deploy_prequantized_tflite.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_prequantized_tflite.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_quantized.html b/docs/how_to/deploy_models/deploy_quantized.html
index 1a75fa73ff..341fe0e8b2 100644
--- a/docs/how_to/deploy_models/deploy_quantized.html
+++ b/docs/how_to/deploy_models/deploy_quantized.html
@@ -521,7 +521,7 @@ for calibration. But the accuracy might be impacted.</p>
   DeprecationWarning,
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  42.416 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  35.538 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-quantized-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/7810ecf51bfc05f7d5e8a400ac3e815d/deploy_quantized.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_quantized.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
index 443a8e4567..9aa0fe4d4f 100644
--- a/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
+++ b/docs/how_to/deploy_models/deploy_ssd_gluoncv.html
@@ -463,24 +463,23 @@ to your device.</p>
 Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/ssd_512_resnet50_v1_voc-9c8b225a.zip...
 
   0%|          | 0/132723 [00:00&lt;?, ?KB/s]
-  4%|4         | 5640/132723 [00:00&lt;00:02, 56397.90KB/s]
- 10%|9         | 12997/132723 [00:00&lt;00:01, 66494.82KB/s]
- 16%|#5        | 20598/132723 [00:00&lt;00:01, 70834.49KB/s]
- 21%|##1       | 28475/132723 [00:00&lt;00:01, 73963.67KB/s]
- 27%|##7       | 36366/132723 [00:00&lt;00:01, 75715.09KB/s]
- 33%|###3      | 44179/132723 [00:00&lt;00:01, 76534.74KB/s]
- 39%|###9      | 51985/132723 [00:00&lt;00:01, 77030.29KB/s]
- 45%|####5     | 59828/132723 [00:00&lt;00:00, 77473.83KB/s]
- 51%|#####     | 67646/132723 [00:00&lt;00:00, 77692.56KB/s]
- 57%|#####6    | 75558/132723 [00:01&lt;00:00, 78131.71KB/s]
- 63%|######2   | 83372/132723 [00:01&lt;00:00, 77757.85KB/s]
- 69%|######8   | 91186/132723 [00:01&lt;00:00, 77866.92KB/s]
- 75%|#######4  | 99068/132723 [00:01&lt;00:00, 78153.94KB/s]
- 81%|########  | 106946/132723 [00:01&lt;00:00, 78337.39KB/s]
- 86%|########6 | 114780/132723 [00:01&lt;00:00, 78271.51KB/s]
- 92%|#########2| 122672/132723 [00:01&lt;00:00, 78465.54KB/s]
- 98%|#########8| 130574/132723 [00:01&lt;00:00, 78630.60KB/s]
-100%|##########| 132723/132723 [00:01&lt;00:00, 76781.09KB/s]
+  4%|3         | 5205/132723 [00:00&lt;00:02, 52016.81KB/s]
+ 10%|9         | 13190/132723 [00:00&lt;00:01, 68381.47KB/s]
+ 16%|#5        | 21199/132723 [00:00&lt;00:01, 73721.89KB/s]
+ 22%|##2       | 29223/132723 [00:00&lt;00:01, 76292.89KB/s]
+ 28%|##8       | 37243/132723 [00:00&lt;00:01, 77698.35KB/s]
+ 34%|###4      | 45287/132723 [00:00&lt;00:01, 78624.08KB/s]
+ 40%|####      | 53337/132723 [00:00&lt;00:01, 79232.31KB/s]
+ 46%|####6     | 61384/132723 [00:00&lt;00:00, 79623.46KB/s]
+ 52%|#####2    | 69385/132723 [00:00&lt;00:00, 79740.27KB/s]
+ 58%|#####8    | 77484/132723 [00:01&lt;00:00, 80123.65KB/s]
+ 64%|######4   | 85573/132723 [00:01&lt;00:00, 80354.62KB/s]
+ 71%|#######   | 93657/132723 [00:01&lt;00:00, 80485.65KB/s]
+ 77%|#######6  | 101742/132723 [00:01&lt;00:00, 80593.11KB/s]
+ 83%|########2 | 109802/132723 [00:01&lt;00:00, 80553.98KB/s]
+ 89%|########8 | 117858/132723 [00:01&lt;00:00, 80473.85KB/s]
+ 95%|#########4| 125906/132723 [00:01&lt;00:00, 80194.88KB/s]
+100%|##########| 132723/132723 [00:01&lt;00:00, 78667.62KB/s]
 </pre></div>
 </div>
 <p>Create TVM runtime and do inference
@@ -519,7 +518,7 @@ Downloading /workspace/.mxnet/models/ssd_512_resnet50_v1_voc-9c8b225a.zip from h
 <span class="n">plt</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  25.637 seconds)</p>
+<img src="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" srcset="../../_images/sphx_glr_deploy_ssd_gluoncv_001.png" alt="deploy ssd gluoncv" class = "sphx-glr-single-img"/><p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 3 minutes  29.501 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-deploy-models-deploy-ssd-gluoncv-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/cccb17d28e5e8b2e94ea8cd5ec59f6ed/deploy_ssd_gluoncv.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">deploy_ssd_gluoncv.py</span></code></a></p>
diff --git a/docs/how_to/deploy_models/sg_execution_times.html b/docs/how_to/deploy_models/sg_execution_times.html
index f89f95c1cc..a9da3bcbbc 100644
--- a/docs/how_to/deploy_models/sg_execution_times.html
+++ b/docs/how_to/deploy_models/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-deploy-models-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>14:45.499</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
+<p><strong>14:47.952</strong> total execution time for <strong>how_to_deploy_models</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 86%" />
@@ -349,39 +349,39 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_ssd_gluoncv.html#sphx-glr-how-to-deploy-models-deploy-ssd-gluoncv-py"><span class="std std-ref">Deploy Single Shot Multibox Detector(SSD) model</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_ssd_gluoncv.py</span></code>)</p></td>
-<td><p>03:25.637</p></td>
+<td><p>03:29.501</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_object_detection_pytorch.html#sphx-glr-how-to-deploy-models-deploy-object-detection-pytorch-py"><span class="std std-ref">Compile PyTorch Object Detection Models</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_object_detection_pytorch.py</span></code>)</p></td>
-<td><p>03:22.255</p></td>
+<td><p>03:27.860</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized_tflite.html#sphx-glr-how-to-deploy-models-deploy-prequantized-tflite-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM - Part 3 (TFLite)</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized_tflite.py</span></code>)</p></td>
-<td><p>02:36.709</p></td>
+<td><p>02:33.738</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_quantized.html#sphx-glr-how-to-deploy-models-deploy-quantized-py"><span class="std std-ref">Deploy a Quantized Model on Cuda</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_quantized.py</span></code>)</p></td>
-<td><p>01:42.416</p></td>
+<td><p>01:35.538</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_prequantized.html#sphx-glr-how-to-deploy-models-deploy-prequantized-py"><span class="std std-ref">Deploy a Framework-prequantized Model with TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_prequantized.py</span></code>)</p></td>
-<td><p>01:11.751</p></td>
+<td><p>01:13.060</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_adreno.html#sphx-glr-how-to-deploy-models-deploy-model-on-adreno-py"><span class="std std-ref">Deploy the Pretrained Model on Adreno</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_adreno.py</span></code>)</p></td>
-<td><p>00:54.388</p></td>
+<td><p>00:53.267</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_android.html#sphx-glr-how-to-deploy-models-deploy-model-on-android-py"><span class="std std-ref">Deploy the Pretrained Model on Android</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_android.py</span></code>)</p></td>
-<td><p>00:39.185</p></td>
+<td><p>00:40.132</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_model_on_nano.html#sphx-glr-how-to-deploy-models-deploy-model-on-nano-py"><span class="std std-ref">Deploy the Pretrained Model on Jetson Nano</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_nano.py</span></code>)</p></td>
-<td><p>00:26.705</p></td>
+<td><p>00:27.693</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="deploy_model_on_rasp.html#sphx-glr-how-to-deploy-models-deploy-model-on-rasp-py"><span class="std std-ref">Deploy the Pretrained Model on Raspberry Pi</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_model_on_rasp.py</span></code>)</p></td>
-<td><p>00:26.447</p></td>
+<td><p>00:27.158</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="deploy_sparse.html#sphx-glr-how-to-deploy-models-deploy-sparse-py"><span class="std std-ref">Deploy a Hugging Face Pruned Model on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">deploy_sparse.py</span></code>)</p></td>
diff --git a/docs/how_to/extend_tvm/bring_your_own_datatypes.html b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
index d6bccee16d..8884bffe0a 100644
--- a/docs/how_to/extend_tvm/bring_your_own_datatypes.html
+++ b/docs/how_to/extend_tvm/bring_your_own_datatypes.html
@@ -619,7 +619,7 @@ In this alpha state of the Bring Your Own Datatypes framework, we have not imple
 <span class="n">module</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">params</span></a> <span class="o">=</span> <span class="n">get_mobilenet</span><span class="p">()</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zipb9b11189-fcad-489c-b4dc-d59850bff597 from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Downloading /workspace/.mxnet/models/mobilenet0.25-9f83e440.zip07ed05b1-4d90-462b-a58b-b6a364e3366a from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/mobilenet0.25-9f83e440.zip...
 </pre></div>
 </div>
 <p>It’s easy to execute MobileNet with native TVM:</p>
diff --git a/docs/how_to/extend_tvm/sg_execution_times.html b/docs/how_to/extend_tvm/sg_execution_times.html
index 42e61743d1..3b1672af67 100644
--- a/docs/how_to/extend_tvm/sg_execution_times.html
+++ b/docs/how_to/extend_tvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-extend-tvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:51.322</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
+<p><strong>00:53.032</strong> total execution time for <strong>how_to_extend_tvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,19 +349,19 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="bring_your_own_datatypes.html#sphx-glr-how-to-extend-tvm-bring-your-own-datatypes-py"><span class="std std-ref">Bring Your Own Datatypes to TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">bring_your_own_datatypes.py</span></code>)</p></td>
-<td><p>00:47.694</p></td>
+<td><p>00:49.186</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="use_pass_instrument.html#sphx-glr-how-to-extend-tvm-use-pass-instrument-py"><span class="std std-ref">How to Use TVM Pass Instrument</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_instrument.py</span></code>)</p></td>
-<td><p>00:02.577</p></td>
+<td><p>00:02.741</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="use_pass_infra.html#sphx-glr-how-to-extend-tvm-use-pass-infra-py"><span class="std std-ref">How to Use TVM Pass Infra</span></a> (<code class="docutils literal notranslate"><span class="pre">use_pass_infra.py</span></code>)</p></td>
-<td><p>00:01.044</p></td>
+<td><p>00:01.097</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="low_level_custom_pass.html#sphx-glr-how-to-extend-tvm-low-level-custom-pass-py"><span class="std std-ref">Writing a Customized Pass</span></a> (<code class="docutils literal notranslate"><span class="pre">low_level_custom_pass.py</span></code>)</p></td>
-<td><p>00:00.007</p></td>
+<td><p>00:00.008</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/extend_tvm/use_pass_instrument.html b/docs/how_to/extend_tvm/use_pass_instrument.html
index 4fd56a7758..d1b0a2eff5 100644
--- a/docs/how_to/extend_tvm/use_pass_instrument.html
+++ b/docs/how_to/extend_tvm/use_pass_instrument.html
@@ -409,7 +409,7 @@ passes. Please also refer to the <a class="reference internal" href="../../arch/
 <a href="https://docs.python.org/3/library/stdtypes.html#tuple" title="builtins.tuple" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">output_shape</span></a> <span class="o">=</span> <span class="p">(</span><a href="https://docs.python.org/3/library/functions.html#int" title="builtins.int" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">batch_size</span></a [...]
 <span class="n">relay_mod</span><span class="p">,</span> <a href="https://docs.python.org/3/library/stdtypes.html#dict" title="builtins.dict" class="sphx-glr-backref-module-builtins sphx-glr-backref-type-py-class sphx-glr-backref-instance"><span class="n">relay_params</span></a> <span class="o">=</span> <a href="../../reference/api/python/relay/testing.html#tvm.relay.testing.resnet.get_workload" title="tvm.relay.testing.resnet.get_workload" class="sphx-glr-backref-module-tvm-relay-testin [...]
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Printing the IR module...&quot;</span><span class="p">)</span>
-<span class="nb">print</span><span class="p">(</span><a href="../../reference/api/python/ir.html#tvm.ir.Node.astext" title="tvm.ir.Node.astext" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-method"><span class="n">relay_mod</span><span class="o">.</span><span class="n">astext</span></a><span class="p">(</span><span class="n">show_meta_data</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
+<span class="nb">print</span><span class="p">(</span><span class="n">relay_mod</span><span class="o">.</span><span class="n">astext</span><span class="p">(</span><span class="n">show_meta_data</span><span class="o">=</span><span class="kc">False</span><span class="p">))</span>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing the IR module...
@@ -526,10 +526,10 @@ profile the execution time of each passes.</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 17965us [17965us] (48.54%; 48.54%)
-FoldScaleAxis: 19042us [6us] (51.46%; 51.46%)
-        FoldConstant: 19035us [1705us] (51.44%; 99.97%)
-                InferType: 17330us [17330us] (46.83%; 91.04%)
+InferType: 19948us [19948us] (50.28%; 50.28%)
+FoldScaleAxis: 19724us [10us] (49.72%; 49.72%)
+        FoldConstant: 19714us [1811us] (49.69%; 99.95%)
+                InferType: 17903us [17903us] (45.13%; 90.81%)
 </pre></div>
 </div>
 </div>
@@ -551,10 +551,10 @@ Refer to following sections and <a class="reference internal" href="../../refere
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Printing results of timing profile...
-InferType: 17388us [17388us] (47.56%; 47.56%)
-FoldScaleAxis: 19173us [5us] (52.44%; 52.44%)
-        FoldConstant: 19168us [1688us] (52.43%; 99.98%)
-                InferType: 17481us [17481us] (47.81%; 91.20%)
+InferType: 18155us [18155us] (48.05%; 48.05%)
+FoldScaleAxis: 19625us [7us] (51.95%; 51.95%)
+        FoldConstant: 19618us [1787us] (51.93%; 99.96%)
+                InferType: 17831us [17831us] (47.20%; 90.89%)
 </pre></div>
 </div>
 <p>Register empty list to clear existing instruments.</p>
diff --git a/docs/how_to/optimize_operators/opt_conv_cuda.html b/docs/how_to/optimize_operators/opt_conv_cuda.html
index 1fe4f81a30..cfd2880fee 100644
--- a/docs/how_to/optimize_operators/opt_conv_cuda.html
+++ b/docs/how_to/optimize_operators/opt_conv_cuda.html
@@ -575,7 +575,7 @@ latency of convolution.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Convolution: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">b</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">*</span> <span cl [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 54.126304 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Convolution: 41.779296 ms
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-optimize-operators-opt-conv-cuda-py">
diff --git a/docs/how_to/optimize_operators/opt_conv_tensorcore.html b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
index baf11a2c2c..a04e24fe95 100644
--- a/docs/how_to/optimize_operators/opt_conv_tensorcore.html
+++ b/docs/how_to/optimize_operators/opt_conv_tensorcore.html
@@ -867,7 +867,7 @@ be able to run on our build server</p>
     <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;conv2d with tensor core: </span><span class="si">%f</span><span class="s2"> ms&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">w</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span> <span class="o">* [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 6.691885 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>conv2d with tensor core: 7.157965 ms
 </pre></div>
 </div>
 </div>
diff --git a/docs/how_to/optimize_operators/opt_gemm.html b/docs/how_to/optimize_operators/opt_gemm.html
index 585da49595..8ba4266660 100644
--- a/docs/how_to/optimize_operators/opt_gemm.html
+++ b/docs/how_to/optimize_operators/opt_gemm.html
@@ -472,8 +472,8 @@ Then we write a baseline implementation, the simplest way to write a matrix mult
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Baseline: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.019105
-Baseline: 3.258741
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Numpy running time: 0.018668
+Baseline: 3.404827
 </pre></div>
 </div>
 <p>In TVM, we can always inspect lower level IR to debug or optimize our schedule.
@@ -532,7 +532,7 @@ fill 32 * 32 * sizeof(float) which is 4KB in the cache whose total size is 32KB
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt1: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.298000
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt1: 0.304255
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -589,7 +589,7 @@ vastly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt2: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.332539
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt2: 0.339157
 </pre></div>
 </div>
 <p>Here is the generated IR after vectorization.</p>
@@ -644,7 +644,7 @@ the access pattern for A matrix is more cache friendly.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt3: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.114561
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt3: 0.117516
 </pre></div>
 </div>
 <p>Here is the generated IR after loop permutation.</p>
@@ -721,7 +721,7 @@ flattening.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt4: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.108607
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt4: 0.109511
 </pre></div>
 </div>
 <p>Here is the generated IR after array packing.</p>
@@ -799,7 +799,7 @@ write to C when all the block results are ready.</p>
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt5: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">evaluator</span><span class="p">(</span><span class="n">a</span><span class="p">,</span> <span class="n">b</span><span class="p">,</span> <span class="n">c</span><span class="p">)</span><span class="o">.</span><span class="n">mean</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.111281
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt5: 0.110792
 </pre></div>
 </div>
 <p>Here is the generated IR after blocking.</p>
@@ -879,7 +879,7 @@ class Module:
 <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Opt6: </span><span class="si">%f</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">opt6_time</span><span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.146370
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Opt6: 0.146887
 </pre></div>
 </div>
 <p>Here is the generated IR after parallelization.</p>
diff --git a/docs/how_to/optimize_operators/sg_execution_times.html b/docs/how_to/optimize_operators/sg_execution_times.html
index e87d48ab53..beb6415a30 100644
--- a/docs/how_to/optimize_operators/sg_execution_times.html
+++ b/docs/how_to/optimize_operators/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-optimize-operators-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:34.377</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
+<p><strong>00:34.897</strong> total execution time for <strong>how_to_optimize_operators</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_gemm.html#sphx-glr-how-to-optimize-operators-opt-gemm-py"><span class="std std-ref">How to optimize GEMM on CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_gemm.py</span></code>)</p></td>
-<td><p>00:31.833</p></td>
+<td><p>00:32.409</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="opt_conv_tensorcore.html#sphx-glr-how-to-optimize-operators-opt-conv-tensorcore-py"><span class="std std-ref">How to optimize convolution using TensorCores</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_tensorcore.py</span></code>)</p></td>
-<td><p>00:01.469</p></td>
+<td><p>00:01.437</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="opt_conv_cuda.html#sphx-glr-how-to-optimize-operators-opt-conv-cuda-py"><span class="std std-ref">How to optimize convolution on GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">opt_conv_cuda.py</span></code>)</p></td>
-<td><p>00:01.076</p></td>
+<td><p>00:01.052</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
index 0d5763b8b0..71bad71bed 100644
--- a/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
+++ b/docs/how_to/tune_with_autoscheduler/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autoscheduler-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>09:08.538</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
+<p><strong>09:18.502</strong> total execution time for <strong>how_to_tune_with_autoscheduler</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 85%" />
@@ -349,27 +349,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_layer_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py"><span class="std std-ref">Auto-scheduling a Convolution Layer for GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_layer_cuda.py</span></code>)</p></td>
-<td><p>05:29.796</p></td>
+<td><p>05:39.006</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-x86-py"><span class="std std-ref">Auto-scheduling a Neural Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_x86.py</span></code>)</p></td>
-<td><p>01:37.055</p></td>
+<td><p>01:38.823</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_cuda.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-cuda-py"><span class="std std-ref">Auto-scheduling a Neural Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_cuda.py</span></code>)</p></td>
-<td><p>01:04.667</p></td>
+<td><p>01:06.225</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_sparse_x86.html#sphx-glr-how-to-tune-with-autoscheduler-tune-sparse-x86-py"><span class="std std-ref">Auto-scheduling Sparse Matrix Multiplication on CPU with Custom Sketch Rule</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_sparse_x86.py</span></code>)</p></td>
-<td><p>00:30.964</p></td>
+<td><p>00:27.877</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_network_arm.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-arm-py"><span class="std std-ref">Auto-scheduling a Neural Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_arm.py</span></code>)</p></td>
-<td><p>00:13.513</p></td>
+<td><p>00:13.863</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_network_mali.html#sphx-glr-how-to-tune-with-autoscheduler-tune-network-mali-py"><span class="std std-ref">Auto-scheduling a Neural Network for mali GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_network_mali.py</span></code>)</p></td>
-<td><p>00:12.544</p></td>
+<td><p>00:12.707</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
index cc0639ac09..b1be55d2dc 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_conv2d_layer_cuda.html
@@ -507,13 +507,13 @@ class Module:
         bias_1 = T.match_buffer(bias, (1, 512, 1, 1))
         compute_1 = T.match_buffer(compute, (1, 512, 7, 7))
         blockIdx_x = T.env_thread(&quot;blockIdx.x&quot;)
-        T.launch_thread(blockIdx_x, 28)
-        conv2d_nchw = T.allocate([14], &quot;float32&quot;, &quot;local&quot;)
-        pad_temp_shared = T.allocate([72], &quot;float32&quot;, &quot;shared&quot;)
-        kernel_shared = T.allocate([3072], &quot;float32&quot;, &quot;shared&quot;)
+        T.launch_thread(blockIdx_x, 16)
+        conv2d_nchw = T.allocate([8], &quot;float32&quot;, &quot;local&quot;)
+        pad_temp_shared = T.allocate([1008], &quot;float32&quot;, &quot;shared&quot;)
+        kernel_shared = T.allocate([1536], &quot;float32&quot;, &quot;shared&quot;)
         threadIdx_x = T.env_thread(&quot;threadIdx.x&quot;)
-        T.launch_thread(threadIdx_x, 64)
-        conv2d_nchw_1 = T.buffer_decl((14,), data=conv2d_nchw, scope=&quot;local&quot;, align=32)
+        T.launch_thread(threadIdx_x, 196)
+        conv2d_nchw_1 = T.buffer_decl((1,), data=conv2d_nchw, scope=&quot;local&quot;, align=4)
         conv2d_nchw_1[0] = T.float32(0)
         conv2d_nchw_1[1] = T.float32(0)
         conv2d_nchw_1[2] = T.float32(0)
@@ -522,466 +522,40 @@ class Module:
         conv2d_nchw_1[5] = T.float32(0)
         conv2d_nchw_1[6] = T.float32(0)
         conv2d_nchw_1[7] = T.float32(0)
-        conv2d_nchw_1[8] = T.float32(0)
-        conv2d_nchw_1[9] = T.float32(0)
-        conv2d_nchw_1[10] = T.float32(0)
-        conv2d_nchw_1[11] = T.float32(0)
-        conv2d_nchw_1[12] = T.float32(0)
-        conv2d_nchw_1[13] = T.float32(0)
-        for rc_outer_outer, ry_outer_outer in T.grid(64, 3):
-            cse_var_2: T.int32 = rc_outer_outer * 72
-            cse_var_1: T.int32 = ry_outer_outer * 3
-            threadIdx_x_1 = T.env_thread(&quot;threadIdx.x&quot;)
-            pad_temp_shared_1 = T.buffer_decl((72,), data=pad_temp_shared, scope=&quot;shared&quot;)
-            with T.launch_thread(threadIdx_x_1, 64):
-                data_2 = T.buffer_decl((25088,), data=data_1.data)
-                if T.likely(threadIdx_x_1 &lt; 18):
-                    pad_temp_shared_1[threadIdx_x_1 * 4] = T.if_then_else(1 &lt;= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 &lt; 8 and 1 &lt;= threadIdx_x_1 * 4 % 9 and threadIdx_x_1 * 4 % 9 &lt; 8, data_2[rc_outer_outer * 392 + threadIdx_x_1 * 4 // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + threadIdx_x_1 * 4 % 9 - 8], T.float32(0))
-                if T.likely(threadIdx_x_1 &lt; 18):
-                    pad_temp_shared_1[threadIdx_x_1 * 4 + 1] = T.if_then_else(1 &lt;= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 &lt; 8 and 1 &lt;= (threadIdx_x_1 * 4 + 1) % 9 and (threadIdx_x_1 * 4 + 1) % 9 &lt; 8, data_2[rc_outer_outer * 392 + (threadIdx_x_1 * 4 + 1) // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + (threadIdx_x_1 * 4 + 1) % 9 - 8], T.float32(0))
-                if T.likely(threadIdx_x_1 &lt; 18):
-                    pad_temp_shared_1[threadIdx_x_1 * 4 + 2] = T.if_then_else(1 &lt;= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 &lt; 8 and 1 &lt;= (threadIdx_x_1 * 4 + 2) % 9 and (threadIdx_x_1 * 4 + 2) % 9 &lt; 8, data_2[rc_outer_outer * 392 + (threadIdx_x_1 * 4 + 2) // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + (threadIdx_x_1 * 4 + 2) % 9 - 8], T.float32(0))
-                if T.likely(threadIdx_x_1 &lt; 18):
-                    pad_temp_shared_1[threadIdx_x_1 * 4 + 3] = T.if_then_else(1 &lt;= ry_outer_outer + blockIdx_x % 7 and ry_outer_outer + blockIdx_x % 7 &lt; 8 and 1 &lt;= (threadIdx_x_1 * 4 + 3) % 9 and (threadIdx_x_1 * 4 + 3) % 9 &lt; 8, data_2[rc_outer_outer * 392 + (threadIdx_x_1 * 4 + 3) // 9 * 49 + ry_outer_outer * 7 + blockIdx_x % 7 * 7 + (threadIdx_x_1 * 4 + 3) % 9 - 8], T.float32(0))
-            threadIdx_x_2 = T.env_thread(&quot;threadIdx.x&quot;)
-            kernel_shared_1 = T.buffer_decl((3072,), data=kernel_shared, scope=&quot;shared&quot;)
-            kernel_2 = T.buffer_decl((2359296,), data=kernel_1.data)
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 64] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 64) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 128] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 128) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 192] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 36864]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 256] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 256) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 320] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 320) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 384] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 73728]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 448] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 448) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 512] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 512) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 576] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 110592]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 640] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 640) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 704] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 704) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 768] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 147456]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 832] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 832) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 896] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 896) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 960] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 184320]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1024] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1024) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1088] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1088) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1152] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 221184]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1216] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1216) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1280] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1280) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1344] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 258048]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1408] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1408) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1472] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1472) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1536] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 294912]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1600] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1600) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1664] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1664) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1728] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 331776]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1792] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1792) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1856] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1856) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1920] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 368640]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 1984] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 1984) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2048] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2048) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2112] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 405504]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2176] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2176) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2240] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2240) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2304] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 442368]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2368] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2368) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2432] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2432) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2496] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 479232]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2560] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2560) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2624] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2624) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2688] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 516096]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2752] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2752) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2816] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2816) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2880] = kernel_2[blockIdx_x // 7 * 589824 + threadIdx_x_2 // 24 * 4608 + cse_var_2 + threadIdx_x_2 % 24 // 3 * 9 + cse_var_1 + threadIdx_x_2 % 3 + 552960]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 2944] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 2944) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 16) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 1) % 3]
-            with T.launch_thread(threadIdx_x_2, 64):
-                kernel_shared_1[threadIdx_x_2 + 3008] = kernel_2[blockIdx_x // 7 * 589824 + (threadIdx_x_2 + 3008) // 24 * 4608 + cse_var_2 + (threadIdx_x_2 + 8) % 24 // 3 * 9 + cse_var_1 + (threadIdx_x_2 + 2) % 3]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[0] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[9] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 3]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[0] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[9] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 24]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 27]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 1]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 4]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[1] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[10] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 25]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 28]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[8] * kernel_shared_1[threadIdx_x * 48 + 2]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[17] * kernel_shared_1[threadIdx_x * 48 + 5]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[2] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[11] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[3] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[12] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[4] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[13] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[5] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[14] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[6] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[15] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[7] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[16] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[8] * kernel_shared_1[threadIdx_x * 48 + 26]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[17] * kernel_shared_1[threadIdx_x * 48 + 29]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[18] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[27] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 6]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 9]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[18] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[27] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 30]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 33]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 7]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 10]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[19] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[28] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 31]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 34]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[26] * kernel_shared_1[threadIdx_x * 48 + 8]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[35] * kernel_shared_1[threadIdx_x * 48 + 11]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[20] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[29] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[21] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[30] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[22] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[31] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[23] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[32] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[24] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[33] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[25] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[34] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[26] * kernel_shared_1[threadIdx_x * 48 + 32]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[35] * kernel_shared_1[threadIdx_x * 48 + 35]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[36] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[45] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 12]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 15]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[36] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[45] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 36]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 39]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 13]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 16]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[37] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[46] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 37]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 40]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[44] * kernel_shared_1[threadIdx_x * 48 + 14]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[53] * kernel_shared_1[threadIdx_x * 48 + 17]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[38] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[47] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[39] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[48] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[40] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[49] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[41] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[50] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[42] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[51] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[43] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[52] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[44] * kernel_shared_1[threadIdx_x * 48 + 38]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[53] * kernel_shared_1[threadIdx_x * 48 + 41]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[54] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[63] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 18]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 21]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[54] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[63] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 42]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 45]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 19]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 22]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[55] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[64] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 43]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 46]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[62] * kernel_shared_1[threadIdx_x * 48 + 20]
-            conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[71] * kernel_shared_1[threadIdx_x * 48 + 23]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[56] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[65] * kernel_shared_1[threadIdx_x * 48 + 47]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[57] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[8] = conv2d_nchw_1[8] + pad_temp_shared_1[66] * kernel_shared_1[threadIdx_x * 48 + 47]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[58] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[9] = conv2d_nchw_1[9] + pad_temp_shared_1[67] * kernel_shared_1[threadIdx_x * 48 + 47]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[59] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[10] = conv2d_nchw_1[10] + pad_temp_shared_1[68] * kernel_shared_1[threadIdx_x * 48 + 47]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[60] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[11] = conv2d_nchw_1[11] + pad_temp_shared_1[69] * kernel_shared_1[threadIdx_x * 48 + 47]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[61] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[12] = conv2d_nchw_1[12] + pad_temp_shared_1[70] * kernel_shared_1[threadIdx_x * 48 + 47]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[62] * kernel_shared_1[threadIdx_x * 48 + 44]
-            conv2d_nchw_1[13] = conv2d_nchw_1[13] + pad_temp_shared_1[71] * kernel_shared_1[threadIdx_x * 48 + 47]
-        for i1_inner, i3_inner in T.grid(2, 7):
-            compute_2 = T.buffer_decl((25088,), data=compute_1.data)
-            bias_2 = T.buffer_decl((512,), data=bias_1.data)
-            compute_2[blockIdx_x // 7 * 6272 + threadIdx_x * 98 + i1_inner * 49 + blockIdx_x % 7 * 7 + i3_inner] = T.max(conv2d_nchw_1[i1_inner * 7 + i3_inner] + bias_2[blockIdx_x // 7 * 128 + threadIdx_x * 2 + i1_inner], T.float32(0))
+        for rc_outer_outer, rx_outer_outer in T.grid(32, 3):
+            pad_temp_shared_1 = T.buffer_decl((1008,), data=pad_temp_shared, scope=&quot;shared&quot;)
+            for ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer in range(6):
+                threadIdx_x_1 = T.env_thread(&quot;threadIdx.x&quot;)
+                T.launch_thread(threadIdx_x_1, 196)
+                if T.likely(ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 7 + threadIdx_x_1 // 28 &lt; 36):
+                    data_2 = T.buffer_decl((25088,), data=data_1.data)
+                    pad_temp_shared_1[ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 196 + threadIdx_x_1] = T.if_then_else(1 &lt;= (threadIdx_x_1 // 7 + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9 and (threadIdx_x_1 // 7 + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9 &lt; 8 and 1 &lt;= rx_outer_outer + threadIdx_x_1 % 7 and rx_outer_outer + threadIdx_x_1 % 7 &lt; 8, data_2[rc_outer_outer * 784 + (ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 28 + threadIdx_x_1 // 7) // 9  [...]
+            kernel_shared_1 = T.buffer_decl((1536,), data=kernel_shared, scope=&quot;shared&quot;)
+            for ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer in range(8):
+                threadIdx_x_1 = T.env_thread(&quot;threadIdx.x&quot;)
+                T.launch_thread(threadIdx_x_1, 196)
+                if T.likely(ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 49 + threadIdx_x_1 // 4 &lt; 384):
+                    kernel_2 = T.buffer_decl((2359296,), data=kernel_1.data)
+                    kernel_shared_1[ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 196 + threadIdx_x_1] = kernel_2[blockIdx_x * 147456 + (ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 49 + threadIdx_x_1 // 4) // 12 * 4608 + rc_outer_outer * 144 + (ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 4 + threadIdx_x_1) % 48 // 3 * 9 + (threadIdx_x_1 + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 3 * 3 + rx_outer_outer]
+            for rc_outer_inner, ry_outer_inner, rc_inner in T.grid(8, 3, 2):
+                conv2d_nchw_1[0] = conv2d_nchw_1[0] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner]
+                conv2d_nchw_1[1] = conv2d_nchw_1[1] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 192]
+                conv2d_nchw_1[2] = conv2d_nchw_1[2] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 384]
+                conv2d_nchw_1[3] = conv2d_nchw_1[3] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 576]
+                conv2d_nchw_1[4] = conv2d_nchw_1[4] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 768]
+                conv2d_nchw_1[5] = conv2d_nchw_1[5] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 960]
+                conv2d_nchw_1[6] = conv2d_nchw_1[6] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 1152]
+                conv2d_nchw_1[7] = conv2d_nchw_1[7] + pad_temp_shared_1[rc_outer_inner * 126 + rc_inner * 63 + ry_outer_inner * 7 + threadIdx_x % 49] * kernel_shared_1[threadIdx_x // 49 * 48 + rc_outer_inner * 6 + rc_inner * 3 + ry_outer_inner + 1344]
+        compute_2 = T.buffer_decl((25088,), data=compute_1.data)
+        bias_2 = T.buffer_decl((512,), data=bias_1.data)
+        compute_2[blockIdx_x * 1568 + threadIdx_x] = T.max(conv2d_nchw_1[0] + bias_2[blockIdx_x * 32 + threadIdx_x // 49], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 196] = T.max(conv2d_nchw_1[1] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 4], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 392] = T.max(conv2d_nchw_1[2] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 8], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 588] = T.max(conv2d_nchw_1[3] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 12], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 784] = T.max(conv2d_nchw_1[4] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 16], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 980] = T.max(conv2d_nchw_1[5] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 20], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 1176] = T.max(conv2d_nchw_1[6] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 24], T.float32(0))
+        compute_2[blockIdx_x * 1568 + threadIdx_x + 1372] = T.max(conv2d_nchw_1[7] + bias_2[blockIdx_x * 32 + threadIdx_x // 49 + 28], T.float32(0))
 </pre></div>
 </div>
 </div>
@@ -1015,7 +589,7 @@ class Module:
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.354 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 0.386 ms
 </pre></div>
 </div>
 </div>
@@ -1045,35 +619,35 @@ conv2d_nchw_nn_o_o_i, conv2d_nchw_nn_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o
 conv2d_nchw_nn_o_o_o_i, conv2d_nchw_nn_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_i, factor=1)
 conv2d_nchw_nn_o_o_o_o, conv2d_nchw_nn_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_nn_o_o_o_i, factor=1)
 conv2d_nchw_ff_o_i, conv2d_nchw_ff_i = s[conv2d_nchw].split(conv2d_nchw_ff, factor=1)
-conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=2)
-conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=64)
-conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=1)
+conv2d_nchw_ff_o_o_i, conv2d_nchw_ff_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_i, factor=1)
+conv2d_nchw_ff_o_o_o_i, conv2d_nchw_ff_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_i, factor=4)
+conv2d_nchw_ff_o_o_o_o, conv2d_nchw_ff_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_ff_o_o_o_i, factor=8)
 conv2d_nchw_yy_o_i, conv2d_nchw_yy_i = s[conv2d_nchw].split(conv2d_nchw_yy, factor=1)
 conv2d_nchw_yy_o_o_i, conv2d_nchw_yy_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_i, factor=1)
-conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=1)
+conv2d_nchw_yy_o_o_o_i, conv2d_nchw_yy_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_i, factor=7)
 conv2d_nchw_yy_o_o_o_o, conv2d_nchw_yy_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_yy_o_o_o_i, factor=1)
 conv2d_nchw_xx_o_i, conv2d_nchw_xx_i = s[conv2d_nchw].split(conv2d_nchw_xx, factor=1)
-conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=7)
-conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=1)
+conv2d_nchw_xx_o_o_i, conv2d_nchw_xx_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_i, factor=1)
+conv2d_nchw_xx_o_o_o_i, conv2d_nchw_xx_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_i, factor=7)
 conv2d_nchw_xx_o_o_o_o, conv2d_nchw_xx_o_o_o_i = s[conv2d_nchw].split(conv2d_nchw_xx_o_o_o_i, factor=1)
 conv2d_nchw_rc_o_i, conv2d_nchw_rc_i = s[conv2d_nchw].split(conv2d_nchw_rc, factor=2)
-conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=4)
+conv2d_nchw_rc_o_o, conv2d_nchw_rc_o_i = s[conv2d_nchw].split(conv2d_nchw_rc_o_i, factor=8)
 conv2d_nchw_ry_o_i, conv2d_nchw_ry_i = s[conv2d_nchw].split(conv2d_nchw_ry, factor=1)
-conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=1)
+conv2d_nchw_ry_o_o, conv2d_nchw_ry_o_i = s[conv2d_nchw].split(conv2d_nchw_ry_o_i, factor=3)
 conv2d_nchw_rx_o_i, conv2d_nchw_rx_i = s[conv2d_nchw].split(conv2d_nchw_rx, factor=1)
-conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=3)
+conv2d_nchw_rx_o_o, conv2d_nchw_rx_o_i = s[conv2d_nchw].split(conv2d_nchw_rx_o_i, factor=1)
 s[conv2d_nchw].reorder(conv2d_nchw_nn_o_o_o_o, conv2d_nchw_ff_o_o_o_o, conv2d_nchw_yy_o_o_o_o, conv2d_nchw_xx_o_o_o_o, conv2d_nchw_nn_o_o_o_i, conv2d_nchw_ff_o_o_o_i, conv2d_nchw_yy_o_o_o_i, conv2d_nchw_xx_o_o_o_i, conv2d_nchw_nn_o_o_i, conv2d_nchw_ff_o_o_i, conv2d_nchw_yy_o_o_i, conv2d_nchw_xx_o_o_i, conv2d_nchw_rc_o_o, conv2d_nchw_ry_o_o, conv2d_nchw_rx_o_o, conv2d_nchw_rc_o_i, conv2d_nchw_ry_o_i, conv2d_nchw_rx_o_i, conv2d_nchw_nn_o_i, conv2d_nchw_ff_o_i, conv2d_nchw_yy_o_i, conv2d_nc [...]
 compute_i0_o_i, compute_i0_i = s[compute].split(compute_i0, factor=1)
 compute_i0_o_o_i, compute_i0_o_i = s[compute].split(compute_i0_o_i, factor=1)
 compute_i0_o_o_o, compute_i0_o_o_i = s[compute].split(compute_i0_o_o_i, factor=1)
-compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=2)
-compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=64)
-compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=1)
+compute_i1_o_i, compute_i1_i = s[compute].split(compute_i1, factor=1)
+compute_i1_o_o_i, compute_i1_o_i = s[compute].split(compute_i1_o_i, factor=4)
+compute_i1_o_o_o, compute_i1_o_o_i = s[compute].split(compute_i1_o_o_i, factor=8)
 compute_i2_o_i, compute_i2_i = s[compute].split(compute_i2, factor=1)
-compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=1)
+compute_i2_o_o_i, compute_i2_o_i = s[compute].split(compute_i2_o_i, factor=7)
 compute_i2_o_o_o, compute_i2_o_o_i = s[compute].split(compute_i2_o_o_i, factor=1)
-compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=7)
-compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=1)
+compute_i3_o_i, compute_i3_i = s[compute].split(compute_i3, factor=1)
+compute_i3_o_o_i, compute_i3_o_i = s[compute].split(compute_i3_o_i, factor=7)
 compute_i3_o_o_o, compute_i3_o_o_i = s[compute].split(compute_i3_o_o_i, factor=1)
 s[compute].reorder(compute_i0_o_o_o, compute_i1_o_o_o, compute_i2_o_o_o, compute_i3_o_o_o, compute_i0_o_o_i, compute_i1_o_o_i, compute_i2_o_o_i, compute_i3_o_o_i, compute_i0_o_i, compute_i1_o_i, compute_i2_o_i, compute_i3_o_i, compute_i0_i, compute_i1_i, compute_i2_i, compute_i3_i)
 s[conv2d_nchw].compute_at(s[compute], compute_i3_o_i)
@@ -1093,14 +667,14 @@ s[compute].bind(compute_i0_o_i_i1_o_i_fused_i2_o_i_fused_i3_o_i_fused, te.thread
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[kernel_shared].fuse(kernel_shared_ax0, kernel_shared_ax1, kernel_shared_ax2, kernel_shared_ax3)
 kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[kernel_shared].vectorize(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[kernel_shared].split(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=196)
 s[kernel_shared].bind(kernel_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
 pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused = s[pad_temp_shared].fuse(pad_temp_shared_ax0, pad_temp_shared_ax1, pad_temp_shared_ax2, pad_temp_shared_ax3)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=4)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused, factor=1)
 s[pad_temp_shared].vectorize(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_i)
-pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=64)
+pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_o, pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i = s[pad_temp_shared].split(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o, factor=196)
 s[pad_temp_shared].bind(pad_temp_shared_ax0_ax1_fused_ax2_fused_ax3_fused_o_i, te.thread_axis(&quot;threadIdx.x&quot;))
-s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 512)
+s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;auto_unroll_max_step&quot;, 0)
 s[conv2d_nchw].pragma(conv2d_nchw_nn_o_o_o_o, &quot;unroll_explicit&quot;, True)
 
 CUDA source code:
@@ -1118,10 +692,10 @@ CUDA source code:
   #define int64_t long long
   #define uint64_t unsigned long long
 #endif
-extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
-  float conv2d_nchw[14];
-  __shared__ float pad_temp_shared[72];
-  __shared__ float kernel_shared[3072];
+extern &quot;C&quot; __global__ void __launch_bounds__(196) default_function_kernel0(float* __restrict__ data, float* __restrict__ kernel, float* __restrict__ compute, float* __restrict__ bias) {
+  float conv2d_nchw[8];
+  __shared__ float pad_temp_shared[1008];
+  __shared__ float kernel_shared[1536];
   conv2d_nchw[0] = 0.000000e+00f;
   conv2d_nchw[1] = 0.000000e+00f;
   conv2d_nchw[2] = 0.000000e+00f;
@@ -1130,419 +704,44 @@ extern &quot;C&quot; __global__ void __launch_bounds__(64) default_function_kern
   conv2d_nchw[5] = 0.000000e+00f;
   conv2d_nchw[6] = 0.000000e+00f;
   conv2d_nchw[7] = 0.000000e+00f;
-  conv2d_nchw[8] = 0.000000e+00f;
-  conv2d_nchw[9] = 0.000000e+00f;
-  conv2d_nchw[10] = 0.000000e+00f;
-  conv2d_nchw[11] = 0.000000e+00f;
-  conv2d_nchw[12] = 0.000000e+00f;
-  conv2d_nchw[13] = 0.000000e+00f;
-  for (int rc_outer_outer = 0; rc_outer_outer &lt; 64; ++rc_outer_outer) {
-    for (int ry_outer_outer = 0; ry_outer_outer &lt; 3; ++ry_outer_outer) {
+  for (int rc_outer_outer = 0; rc_outer_outer &lt; 32; ++rc_outer_outer) {
+    for (int rx_outer_outer = 0; rx_outer_outer &lt; 3; ++rx_outer_outer) {
       __syncthreads();
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[(((int)threadIdx.x) * 4)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= ((((int)threadIdx.x) * 4) % 9))) &amp;&amp; (((((int)threadIdx.x) * 4) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + (((((int)threadIdx.x) * 4) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + ((((int)threadIdx.x) * 4) % 9)) - 8)] : 0.000000e+00f);
+      for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer &lt; 6; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) {
+        if (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 7) + (((int)threadIdx.x) / 28)) &lt; 36) {
+          pad_temp_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer * 196) + ((int)threadIdx.x))] = (((((1 &lt;= (((((int)threadIdx.x) / 7) + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9)) &amp;&amp; ((((((int)threadIdx.x) / 7) + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer) % 9) &lt; 8)) &amp;&amp; (1 &lt;= (rx_outer_outer + (((int)threadIdx.x) % 7)))) &amp;&amp; ((rx_outer_outer + (((int)threadIdx.x) % 7)) &lt; 8)) ? data[((((((rc_outer_outer * 784) + ((((ax0_ax1_fused_ax2 [...]
+        }
       }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 1)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 1) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 1) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 1) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 1) % 9)) - 8)] : 0.000000e+00f);
+      for (int ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 = 0; ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 &lt; 8; ++ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1) {
+        if (((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 49) + (((int)threadIdx.x) &gt;&gt; 2)) &lt; 384) {
+          kernel_shared[((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 196) + ((int)threadIdx.x))] = kernel[((((((((int)blockIdx.x) * 147456) + ((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 49) + (((int)threadIdx.x) &gt;&gt; 2)) / 12) * 4608)) + (rc_outer_outer * 144)) + (((((ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1 * 4) + ((int)threadIdx.x)) % 48) / 3) * 9)) + (((((int)threadIdx.x) + ax0_ax1_fused_ax2_fused_ax3_fused_outer_outer_1) % 3) * 3)) + rx_outer_outer)];
+        }
       }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 2)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 2) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 2) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 2) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 2) % 9)) - 8)] : 0.000000e+00f);
-      }
-      if (((int)threadIdx.x) &lt; 18) {
-        pad_temp_shared[((((int)threadIdx.x) * 4) + 3)] = (((((1 &lt;= (ry_outer_outer + (((int)blockIdx.x) % 7))) &amp;&amp; ((ry_outer_outer + (((int)blockIdx.x) % 7)) &lt; 8)) &amp;&amp; (1 &lt;= (((((int)threadIdx.x) * 4) + 3) % 9))) &amp;&amp; ((((((int)threadIdx.x) * 4) + 3) % 9) &lt; 8)) ? data[((((((rc_outer_outer * 392) + ((((((int)threadIdx.x) * 4) + 3) / 9) * 49)) + (ry_outer_outer * 7)) + ((((int)blockIdx.x) % 7) * 7)) + (((((int)threadIdx.x) * 4) + 3) % 9)) - 8)] : 0.000000e+00f);
-      }
-      kernel_shared[((int)threadIdx.x)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 64)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 64) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 128)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 128) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 192)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 36864)];
-      kernel_shared[(((int)threadIdx.x) + 256)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 256) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 320)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 320) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 384)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 73728)];
-      kernel_shared[(((int)threadIdx.x) + 448)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 448) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 512)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 512) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 576)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 110592)];
-      kernel_shared[(((int)threadIdx.x) + 640)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 640) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 704)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 704) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 768)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 147456)];
-      kernel_shared[(((int)threadIdx.x) + 832)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 832) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 896)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 896) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 960)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 184320)];
-      kernel_shared[(((int)threadIdx.x) + 1024)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1024) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1088)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1088) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1152)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 221184)];
-      kernel_shared[(((int)threadIdx.x) + 1216)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1216) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1280)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1280) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1344)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 258048)];
-      kernel_shared[(((int)threadIdx.x) + 1408)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1408) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1472)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1472) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1536)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 294912)];
-      kernel_shared[(((int)threadIdx.x) + 1600)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1600) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1664)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1664) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1728)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 331776)];
-      kernel_shared[(((int)threadIdx.x) + 1792)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1792) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1856)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1856) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 1920)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 368640)];
-      kernel_shared[(((int)threadIdx.x) + 1984)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 1984) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2048)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2048) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2112)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 405504)];
-      kernel_shared[(((int)threadIdx.x) + 2176)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2176) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2240)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2240) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2304)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 442368)];
-      kernel_shared[(((int)threadIdx.x) + 2368)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2368) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2432)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2432) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2496)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 479232)];
-      kernel_shared[(((int)threadIdx.x) + 2560)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2560) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2624)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2624) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2688)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 516096)];
-      kernel_shared[(((int)threadIdx.x) + 2752)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2752) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2816)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2816) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 2880)] = kernel[((((((((((int)blockIdx.x) / 7) * 589824) + ((((int)threadIdx.x) / 24) * 4608)) + (rc_outer_outer * 72)) + (((((int)threadIdx.x) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + (((int)threadIdx.x) % 3)) + 552960)];
-      kernel_shared[(((int)threadIdx.x) + 2944)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 2944) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 16) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 1) % 3))];
-      kernel_shared[(((int)threadIdx.x) + 3008)] = kernel[(((((((((int)blockIdx.x) / 7) * 589824) + (((((int)threadIdx.x) + 3008) / 24) * 4608)) + (rc_outer_outer * 72)) + ((((((int)threadIdx.x) + 8) % 24) / 3) * 9)) + (ry_outer_outer * 3)) + ((((int)threadIdx.x) + 2) % 3))];
       __syncthreads();
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[0] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[1] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[2] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[3] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[4] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[5] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[6] * kernel_shared[(((int)threadIdx.x) * 48)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 3)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[0] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[9] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 24)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 27)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 1)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 4)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[1] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[10] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 25)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 28)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 2)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 5)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[2] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[11] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[3] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[12] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[4] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[13] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[5] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[14] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[6] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[15] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[7] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[16] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[8] * kernel_shared[((((int)threadIdx.x) * 48) + 26)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[17] * kernel_shared[((((int)threadIdx.x) * 48) + 29)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 6)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 9)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[18] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[27] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 30)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 33)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 7)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 10)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[19] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[28] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 31)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 34)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 8)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 11)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[20] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[29] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[21] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[30] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[22] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[31] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[23] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[32] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[24] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[33] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[25] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[34] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[26] * kernel_shared[((((int)threadIdx.x) * 48) + 32)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[35] * kernel_shared[((((int)threadIdx.x) * 48) + 35)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 12)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 15)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[36] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[45] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 36)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 39)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 13)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 16)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[37] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[46] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 37)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 40)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 14)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 17)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[38] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[47] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[39] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[48] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[40] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[49] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[41] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[50] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[42] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[51] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[43] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[52] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[44] * kernel_shared[((((int)threadIdx.x) * 48) + 38)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[53] * kernel_shared[((((int)threadIdx.x) * 48) + 41)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 18)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 21)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[54] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[63] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 42)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 45)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 19)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 22)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[55] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[64] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 43)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 46)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 20)]));
-      conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 23)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[56] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[65] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[57] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[8] = (conv2d_nchw[8] + (pad_temp_shared[66] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[58] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[9] = (conv2d_nchw[9] + (pad_temp_shared[67] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[59] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[10] = (conv2d_nchw[10] + (pad_temp_shared[68] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[60] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[11] = (conv2d_nchw[11] + (pad_temp_shared[69] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[61] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[12] = (conv2d_nchw[12] + (pad_temp_shared[70] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[62] * kernel_shared[((((int)threadIdx.x) * 48) + 44)]));
-      conv2d_nchw[13] = (conv2d_nchw[13] + (pad_temp_shared[71] * kernel_shared[((((int)threadIdx.x) * 48) + 47)]));
-    }
-  }
-  for (int i1_inner = 0; i1_inner &lt; 2; ++i1_inner) {
-    for (int i3_inner = 0; i3_inner &lt; 7; ++i3_inner) {
-      compute[((((((((int)blockIdx.x) / 7) * 6272) + (((int)threadIdx.x) * 98)) + (i1_inner * 49)) + ((((int)blockIdx.x) % 7) * 7)) + i3_inner)] = max((conv2d_nchw[((i1_inner * 7) + i3_inner)] + bias[((((((int)blockIdx.x) / 7) * 128) + (((int)threadIdx.x) * 2)) + i1_inner)]), 0.000000e+00f);
+      for (int rc_outer_inner = 0; rc_outer_inner &lt; 8; ++rc_outer_inner) {
+        for (int ry_outer_inner = 0; ry_outer_inner &lt; 3; ++ry_outer_inner) {
+          for (int rc_inner = 0; rc_inner &lt; 2; ++rc_inner) {
+            conv2d_nchw[0] = (conv2d_nchw[0] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[(((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner)]));
+            conv2d_nchw[1] = (conv2d_nchw[1] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 192)]));
+            conv2d_nchw[2] = (conv2d_nchw[2] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 384)]));
+            conv2d_nchw[3] = (conv2d_nchw[3] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 576)]));
+            conv2d_nchw[4] = (conv2d_nchw[4] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 768)]));
+            conv2d_nchw[5] = (conv2d_nchw[5] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 960)]));
+            conv2d_nchw[6] = (conv2d_nchw[6] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 1152)]));
+            conv2d_nchw[7] = (conv2d_nchw[7] + (pad_temp_shared[((((rc_outer_inner * 126) + (rc_inner * 63)) + (ry_outer_inner * 7)) + (((int)threadIdx.x) % 49))] * kernel_shared[((((((((int)threadIdx.x) / 49) * 48) + (rc_outer_inner * 6)) + (rc_inner * 3)) + ry_outer_inner) + 1344)]));
+          }
+        }
+      }
     }
   }
+  compute[((((int)blockIdx.x) * 1568) + ((int)threadIdx.x))] = max((conv2d_nchw[0] + bias[((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49))]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 196)] = max((conv2d_nchw[1] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 4)]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 392)] = max((conv2d_nchw[2] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 8)]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 588)] = max((conv2d_nchw[3] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 12)]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 784)] = max((conv2d_nchw[4] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 16)]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 980)] = max((conv2d_nchw[5] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 20)]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 1176)] = max((conv2d_nchw[6] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 24)]), 0.000000e+00f);
+  compute[(((((int)blockIdx.x) * 1568) + ((int)threadIdx.x)) + 1372)] = max((conv2d_nchw[7] + bias[(((((int)blockIdx.x) * 32) + (((int)threadIdx.x) / 49)) + 28)]), 0.000000e+00f);
 }
 </pre></div>
 </div>
@@ -1578,7 +777,7 @@ In the example below we resume the status and do more 5 trials.</p>
 Get devices for measurement successfully!
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  29.796 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 5 minutes  39.006 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-conv2d-layer-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e3e540f3b477c0c52d8eb73e674e8ffd/tune_conv2d_layer_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_conv2d_layer_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
index 73831d893f..79fdf75121 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_cuda.html
@@ -916,7 +916,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-   7.8726       7.8762       7.8767       7.8649       0.0054
+   7.8501       7.8517       7.8542       7.8445       0.0041
 </pre></div>
 </div>
 </div>
@@ -938,7 +938,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  4.667 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  6.225 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-cuda-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/eafe360d52540634c9eea0fa89e804bd/tune_network_cuda.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_cuda.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
index deaad06b86..f53b691c24 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_network_x86.html
@@ -935,7 +935,7 @@ so we can read the log file and load the best schedules.</p>
 Evaluate inference time cost...
 Execution time summary:
  mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)
-  744.6278     743.8504     746.7951     743.2380      1.5527
+  753.1335     753.1278     753.5897     752.6829      0.3702
 </pre></div>
 </div>
 </div>
@@ -957,7 +957,7 @@ to learn how to use the RPC Tracker and RPC Server.
 To use the RPC Tracker in auto-scheduler, replace the runner in <code class="code docutils literal notranslate"><span class="pre">TuningOptions</span></code>
 with <a class="reference internal" href="../../reference/api/python/auto_scheduler.html#tvm.auto_scheduler.RPCRunner" title="tvm.auto_scheduler.RPCRunner"><code class="xref any py py-class docutils literal notranslate"><span class="pre">auto_scheduler.RPCRunner</span></code></a>.</p></li>
 </ol>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  37.055 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  38.823 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autoscheduler-tune-network-x86-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/e416b94ca1090b0897c0f6e0df95b911/tune_network_x86.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">tune_network_x86.py</span></code></a></p>
diff --git a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
index 1b7cfa6094..977194c964 100644
--- a/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
+++ b/docs/how_to/tune_with_autoscheduler/tune_sparse_x86.html
@@ -638,71 +638,22 @@ class Module:
         for i0_outer_i1_outer_fused in T.parallel(32):
             compute_2 = T.allocate([2048], &quot;float32&quot;, &quot;global&quot;)
             compute_3 = T.buffer_decl((2048,), data=compute_2)
-            for i_outer_inner, nb_j_inner in T.grid(4, 2):
-                for i_inner_init in range(16):
-                    cse_var_1: T.int32 = i_outer_inner * 512 + i_inner_init * 32 + nb_j_inner * 16
-                    compute_3[cse_var_1] = T.float32(0)
-                    compute_3[cse_var_1 + 1] = T.float32(0)
-                    compute_3[cse_var_1 + 2] = T.float32(0)
-                    compute_3[cse_var_1 + 3] = T.float32(0)
-                    compute_3[cse_var_1 + 4] = T.float32(0)
-                    compute_3[cse_var_1 + 5] = T.float32(0)
-                    compute_3[cse_var_1 + 6] = T.float32(0)
-                    compute_3[cse_var_1 + 7] = T.float32(0)
-                    compute_3[cse_var_1 + 8] = T.float32(0)
-                    compute_3[cse_var_1 + 9] = T.float32(0)
-                    compute_3[cse_var_1 + 10] = T.float32(0)
-                    compute_3[cse_var_1 + 11] = T.float32(0)
-                    compute_3[cse_var_1 + 12] = T.float32(0)
-                    compute_3[cse_var_1 + 13] = T.float32(0)
-                    compute_3[cse_var_1 + 14] = T.float32(0)
-                    compute_3[cse_var_1 + 15] = T.float32(0)
-                for elem_idx, i_inner in T.grid(T.let(cse_var_2, i0_outer_i1_outer_fused % 16 * 2 + nb_j_inner, placeholder_10[cse_var_2 + 1] - placeholder_10[cse_var_2]), 16):
-                    cse_var_2 = T.var(&quot;int32&quot;)
+            for i_outer_inner in range(16):
+                for i_inner_init, j_init in T.grid(8, 16):
+                    compute_3[i_outer_inner * 128 + i_inner_init * 16 + j_init] = T.float32(0)
+                for elem_idx, i_inner, j in T.grid(placeholder_10[i0_outer_i1_outer_fused + 1] - placeholder_10[i0_outer_i1_outer_fused], 8, 16):
                     placeholder_10 = T.buffer_decl((33,), &quot;int32&quot;, data=placeholder_8.data)
-                    cse_var_21: T.int32 = elem_idx * 16
-                    cse_var_20: T.int32 = i0_outer_i1_outer_fused % 16 * 2 + nb_j_inner
-                    cse_var_19: T.int32 = i_outer_inner * 512 + i_inner * 32 + nb_j_inner * 16
-                    cse_var_18: T.int32 = i0_outer_i1_outer_fused // 16 * 16384 + i_outer_inner * 4096 + i_inner * 256
-                    cse_var_17: T.int32 = cse_var_19 + 9
-                    cse_var_16: T.int32 = cse_var_19 + 8
-                    cse_var_15: T.int32 = cse_var_19 + 7
-                    cse_var_14: T.int32 = cse_var_19 + 6
-                    cse_var_13: T.int32 = cse_var_19 + 5
-                    cse_var_12: T.int32 = cse_var_19 + 4
-                    cse_var_11: T.int32 = cse_var_19 + 3
-                    cse_var_10: T.int32 = cse_var_19 + 2
-                    cse_var_9: T.int32 = cse_var_19 + 15
-                    cse_var_8: T.int32 = cse_var_19 + 14
-                    cse_var_7: T.int32 = cse_var_19 + 13
-                    cse_var_6: T.int32 = cse_var_19 + 12
-                    cse_var_5: T.int32 = cse_var_19 + 11
-                    cse_var_4: T.int32 = cse_var_19 + 10
-                    cse_var_3: T.int32 = cse_var_19 + 1
-                    placeholder_11 = T.buffer_decl((78656,), data=placeholder_6.data)
-                    placeholder_12 = T.buffer_decl((32768,), data=placeholder_5.data)
-                    placeholder_13 = T.buffer_decl((4916,), &quot;int32&quot;, data=placeholder_7.data)
-                    compute_3[cse_var_19] = compute_3[cse_var_19] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_3] = compute_3[cse_var_3] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 1] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_10] = compute_3[cse_var_10] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 2] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_11] = compute_3[cse_var_11] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 3] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_12] = compute_3[cse_var_12] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 4] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_13] = compute_3[cse_var_13] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 5] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_14] = compute_3[cse_var_14] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 6] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_15] = compute_3[cse_var_15] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 7] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_16] = compute_3[cse_var_16] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 8] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_17] = compute_3[cse_var_17] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 9] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_4] = compute_3[cse_var_4] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 10] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_5] = compute_3[cse_var_5] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 11] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_6] = compute_3[cse_var_6] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 12] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_7] = compute_3[cse_var_7] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 13] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_8] = compute_3[cse_var_8] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 14] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-                    compute_3[cse_var_9] = compute_3[cse_var_9] + placeholder_11[placeholder_10[cse_var_20] * 16 + cse_var_21 + 15] * T.max(placeholder_12[cse_var_18 + placeholder_13[placeholder_10[cse_var_20] + elem_idx]], T.float32(0))
-            for i0_inner in range(64):
-                cse_var_22: T.int32 = i0_outer_i1_outer_fused // 16 * 32768 + i0_inner * 512 + i0_outer_i1_outer_fused % 16 * 32
+                    if T.likely(elem_idx &lt; placeholder_10[i0_outer_i1_outer_fused + 1] - placeholder_10[i0_outer_i1_outer_fused]):
+                        placeholder_11 = T.buffer_decl((78656,), data=placeholder_6.data)
+                        placeholder_12 = T.buffer_decl((32768,), data=placeholder_5.data)
+                        placeholder_13 = T.buffer_decl((4916,), &quot;int32&quot;, data=placeholder_7.data)
+                        cse_var_1: T.int32 = i_outer_inner * 128 + i_inner * 16 + j
+                        compute_3[cse_var_1] = compute_3[cse_var_1] + placeholder_11[placeholder_10[i0_outer_i1_outer_fused] * 16 + elem_idx * 16 + j] * T.max(placeholder_12[i_outer_inner * 2048 + i_inner * 256 + placeholder_13[placeholder_10[i0_outer_i1_outer_fused] + elem_idx]], T.float32(0))
+            for i0_inner, i1_inner in T.grid(128, 16):
+                cse_var_2: T.int32 = i0_inner * 512 + i0_outer_i1_outer_fused * 16 + i1_inner
                 compute_4 = T.buffer_decl((65536,), data=compute_1.data)
                 placeholder_10 = T.buffer_decl((65536,), data=placeholder_9.data)
-                compute_4[cse_var_22:cse_var_22 + 32] = T.max(compute_3[i0_inner * 32:i0_inner * 32 + 32] + placeholder_10[cse_var_22:cse_var_22 + 32], T.Broadcast(T.float32(0), 32))
+                compute_4[cse_var_2] = T.max(compute_3[i0_inner * 16 + i1_inner] + placeholder_10[cse_var_2], T.float32(0))
 </pre></div>
 </div>
 </div>
@@ -736,7 +687,7 @@ class Module:
 <span class="p">)</span>
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.716 ms
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Execution time of this operator: 1.537 ms
 </pre></div>
 </div>
 <div class="admonition note">
diff --git a/docs/how_to/tune_with_autotvm/sg_execution_times.html b/docs/how_to/tune_with_autotvm/sg_execution_times.html
index 8969c3368e..9c405b8f95 100644
--- a/docs/how_to/tune_with_autotvm/sg_execution_times.html
+++ b/docs/how_to/tune_with_autotvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-tune-with-autotvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:37.938</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
+<p><strong>00:53.823</strong> total execution time for <strong>how_to_tune_with_autotvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,22 +349,22 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="tune_conv2d_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-conv2d-cuda-py"><span class="std std-ref">Tuning High Performance Convolution on NVIDIA GPUs</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_conv2d_cuda.py</span></code>)</p></td>
-<td><p>00:37.906</p></td>
+<td><p>00:53.787</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tune_relay_x86.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-x86-py"><span class="std std-ref">Auto-tuning a Convolutional Network for x86 CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_x86.py</span></code>)</p></td>
-<td><p>00:00.018</p></td>
+<td><p>00:00.022</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></td>
 <td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
-<td><p>00:00.004</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="tune_relay_cuda.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-cuda-py"><span class="std std-ref">Auto-tuning a Convolutional Network for NVIDIA GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_cuda.py</span></code>)</p></td>
+<td><p>00:00.005</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_mobile_gpu.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-mobile-gpu-py"><span class="std std-ref">Auto-tuning a Convolutional Network for Mobile GPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_mobile_gpu.py</span></code>)</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="tune_relay_arm.html#sphx-glr-how-to-tune-with-autotvm-tune-relay-arm-py"><span class="std std-ref">Auto-tuning a Convolutional Network for ARM CPU</span></a> (<code class="docutils literal notranslate"><span class="pre">tune_relay_arm.py</span></code>)</p></td>
 <td><p>00:00.004</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
diff --git a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
index 9c4579de90..96f790c551 100644
--- a/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
+++ b/docs/how_to/tune_with_autotvm/tune_conv2d_cuda.html
@@ -568,8 +568,7 @@ for this template</p>
 waiting for device...
 device available
 Get devices for measurement successfully!
-No: 1   GFLOPS: 111.35/111.35   result: MeasureResult(costs=(0.0020790124897959185,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.7945833206176758, timestamp=1674022602.5026455)      [(&#39;tile_f&#39;, [-1, 1, 32, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,979530
-No: 2   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+No: 1   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -691,8 +690,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 128, 1]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,8198569
-No: 3   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 64, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7741861
+No: 2   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -814,272 +813,162 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 256, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2580687
-No: 4   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
-    func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
-    func = build(s, args, target_host=task.target_host, runtime=runtime)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 227, in build
-    input_mod = lower(inputs, args, name=name, binds=binds)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 134, in lower
-    return ffi.lower_schedule(inp, args, name, binds, simple_mode)
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 128, 2, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 128, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9935877
+No: 3   GFLOPS: 0.00/0.00       result: Traceback (most recent call last):
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 742, in __call__
+    yield remote, remote.load_module(os.path.split(build_result.filename)[1])
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 706, in run_through_rpc
+    costs = time_f(*args).results
+  File &quot;/workspace/python/tvm/runtime/module.py&quot;, line 357, in evaluator
+    blob = feval(*args)
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 276, in tvm._ffi._cy3.core.FuncCall
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 262, in tvm._ffi._cy3.core.FuncCall
+  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 251, in tvm._ffi._cy3.core.FuncCall3
   File &quot;tvm/_ffi/_cython/./base.pxi&quot;, line 181, in tvm._ffi._cy3.core.CHECK_CALL
 tvm._ffi.base.TVMError: Traceback (most recent call last):
-  24: TVMFuncCall
+  4: TVMFuncCall
         at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:451
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
-        at ../include/tvm/runtime/packed_func.h:1617
-  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  1: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+  3: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../include/tvm/runtime/packed_func.h:1217
+  2: tvm::runtime::RPCWrappedFunc::operator()(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
+        at ../src/runtime/rpc/rpc_module.cc:129
+  1: tvm::runtime::RPCClientSession::CallFunc(void*, TVMValue const*, int const*, int, std::function&lt;void (tvm::runtime::TVMArgs)&gt; const&amp;)
+        at ../src/runtime/rpc/rpc_endpoint.cc:1012
+  0: tvm::runtime::RPCEndpoint::CallFunc(void*, TVMValue const*, int const*, int, std::function&lt;void (tvm::runtime::TVMArgs)&gt;)
+        at ../src/runtime/rpc/rpc_endpoint.cc:804
+  File &quot;../src/runtime/rpc/rpc_endpoint.cc&quot;, line 804
+TVMError:
+---------------------------------------------------------------
+An error occurred during the execution of TVM.
+For more information, please see: https://tvm.apache.org/docs/errors.html
+---------------------------------------------------------------
+  Check failed: (code == RPCCode::kReturn) is false: code=kShutdown
+
+During handling of the above exception, another exception occurred:
 
 Traceback (most recent call last):
-  24: TVMFuncCall
-        at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:451
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
-        at ../include/tvm/runtime/packed_func.h:1617
-  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  1: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 2, 128]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10439569
-No: 5   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
-    res = future.result()
-  File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
-    return self.__get_result()
-  File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 384, in __get_result
-    raise self._exception
-  File &quot;/usr/lib/python3.7/concurrent/futures/thread.py&quot;, line 57, in run
-    result = self.fn(*self.args, **self.kwargs)
-  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 432, in &lt;lambda&gt;
-    worker = lambda *args: self._worker_run(*args)
-  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 401, in _worker_run
-    return proc.recv()
-  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 309, in recv
-    raise TimeoutError()
-TimeoutError
-
-        [(&#39;tile_f&#39;, [-1, 8, 2, 16]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2949493
-No: 6   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
-    func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
-    func = build(s, args, target_host=task.target_host, runtime=runtime)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 227, in build
-    input_mod = lower(inputs, args, name=name, binds=binds)
-  File &quot;/workspace/python/tvm/driver/build_module.py&quot;, line 134, in lower
-    return ffi.lower_schedule(inp, args, name, binds, simple_mode)
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 331, in tvm._ffi._cy3.core.PackedFuncBase.__call__
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 276, in tvm._ffi._cy3.core.FuncCall
-  File &quot;tvm/_ffi/_cython/./base.pxi&quot;, line 181, in tvm._ffi._cy3.core.CHECK_CALL
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 706, in run_through_rpc
+    costs = time_f(*args).results
+  File &quot;/usr/lib/python3.7/contextlib.py&quot;, line 130, in __exit__
+    self.gen.throw(type, value, traceback)
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 746, in __call__
+    remote.remove(build_result.filename)
+  File &quot;/workspace/python/tvm/rpc/client.py&quot;, line 144, in remove
+    self._remote_funcs[&quot;remove&quot;] = self.get_function(&quot;tvm.rpc.server.remove&quot;)
+  File &quot;/workspace/python/tvm/rpc/client.py&quot;, line 72, in get_function
+    return self._sess.get_function(name)
+  File &quot;/workspace/python/tvm/runtime/module.py&quot;, line 171, in get_function
+    self.handle, c_str(name), ctypes.c_int(query_imports), ctypes.byref(ret_handle)
+  File &quot;/workspace/python/tvm/_ffi/base.py&quot;, line 348, in check_call
+    raise get_last_ffi_error()
 tvm._ffi.base.TVMError: Traceback (most recent call last):
-  24: TVMFuncCall
-        at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:451
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
+  52: 0xffffffffffffffff
+  51: _start
+  50: __libc_start_main
+  49: _Py_UnixMain
+  48: 0x0000000000650da0
+  47: 0x0000000000650afa
+  46: _PyFunction_FastCallDict
+  45: _PyEval_EvalCodeWithName
+  44: _PyEval_EvalFrameDefault
+  43: _PyFunction_FastCallKeywords
+  42: _PyEval_EvalCodeWithName
+  41: _PyEval_EvalFrameDefault
+  40: _PyMethodDef_RawFastCallKeywords
+  39: 0x0000000000546369
+  38: _PyEval_EvalCodeWithName
+  37: _PyEval_EvalFrameDefault
+  36: _PyFunction_FastCallKeywords
+  35: _PyEval_EvalCodeWithName
+  34: _PyEval_EvalFrameDefault
+  33: _PyFunction_FastCallDict
+  32: _PyEval_EvalCodeWithName
+  31: _PyEval_EvalFrameDefault
+  30: _PyObject_FastCallDict
+  29: 0x00000000004c06e1
+  28: _PyFunction_FastCallDict
+  27: _PyEval_EvalFrameDefault
+  26: _PyMethodDescr_FastCallKeywords
+  25: 0x00000000005dcb58
+  24: 0x00000000005dc83f
+  23: 0x00000000004ba127
+  22: _PyEval_EvalFrameDefault
+  21: _PyFunction_FastCallKeywords
+  20: _PyEval_EvalFrameDefault
+  19: _PyFunction_FastCallKeywords
+  18: _PyEval_EvalFrameDefault
+  17: _PyFunction_FastCallKeywords
+  16: _PyEval_EvalCodeWithName
+  15: _PyEval_EvalFrameDefault
+  14: 0x0000000000537c30
+  13: _PyObject_FastCallKeywords
+  12: 0x00007f713981bfa2
+  11: _ctypes_callproc
+  10: ffi_call
+  9: ffi_call_unix64
+  8: TVMModGetFunction
+        at ../src/runtime/c_runtime_api.cc:408
+  7: tvm::runtime::ModuleNode::GetFunction(std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, bool)
+        at ../src/runtime/module.cc:66
+  6: tvm::runtime::RPCModuleNode::GetFunction(std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, tvm::runtime::ObjectPtr&lt;tvm::runtime::Object&gt; const&amp;)
+        at ../src/runtime/rpc/rpc_module.cc:185
+  5: tvm::runtime::RPCClientSession::GetFunction(std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;)
+        at ../src/runtime/rpc/rpc_endpoint.cc:1007
+  4: tvm::runtime::TVMRetValue tvm::runtime::RPCEndpoint::SysCallRemote&lt;std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;&gt;(tvm::runtime::RPCCode, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;)
+        at ../src/runtime/rpc/rpc_endpoint.h:223
+  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;int, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;&gt;(int&amp;&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;) const
         at ../include/tvm/runtime/packed_func.h:1617
   2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
         at ../include/tvm/runtime/packed_func.h:1217
   1: Call
         at ../include/tvm/runtime/packed_func.h:1213
   0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel
+        at ../src/runtime/rpc/rpc_endpoint.cc:684
+  File &quot;../src/runtime/rpc/rpc_endpoint.cc&quot;, line 684
+TVMError:
+---------------------------------------------------------------
+An error occurred during the execution of TVM.
+For more information, please see: https://tvm.apache.org/docs/errors.html
+---------------------------------------------------------------
+  Check failed: (code == RPCCode::kReturn) is false: code=1
 
 Traceback (most recent call last):
-  24: TVMFuncCall
-        at ../src/runtime/c_runtime_api.cc:477
-  23: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  22: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  21: operator()
-        at ../include/tvm/runtime/packed_func.h:1730
-  20: unpack_call&lt;tvm::IRModule, 5, tvm::&lt;lambda(tvm::te::Schedule, const tvm::runtime::Array&lt;tvm::runtime::ObjectRef&gt;&amp;, const tvm::runtime::String&amp;, const tvm::runtime::Map&lt;tvm::te::Tensor, tvm::tir::Buffer&gt;&amp;, bool)&gt; &gt;
-        at ../include/tvm/runtime/packed_func.h:1670
-  19: run&lt;&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  18: run&lt;tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  17: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  16: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  15: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1630
-  14: run&lt;tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_, tvm::runtime::TVMMovableArgValueWithContext_&gt;
-        at ../include/tvm/runtime/packed_func.h:1645
-  13: operator()
-        at ../src/driver/driver_api.cc:395
-  12: tvm::LowerSchedule(tvm::te::Schedule, tvm::runtime::Array&lt;tvm::runtime::ObjectRef, void&gt; const&amp;, std::__cxx11::basic_string&lt;char, std::char_traits&lt;char&gt;, std::allocator&lt;char&gt; &gt; const&amp;, std::unordered_map&lt;tvm::te::Tensor, tvm::tir::Buffer, std::hash&lt;tvm::te::Tensor&gt;, std::equal_to&lt;tvm::te::Tensor&gt;, std::allocator&lt;std::pair&lt;tvm::te::Tensor const, tvm::tir::Buffer&gt; &gt; &gt; const&amp;, tvm::GlobalVarSupply, bool)
-        at ../src/driver/driver_api.cc:381
-  11: tvm::LowerWithPassList(tvm::IRModule, tvm::runtime::Array&lt;tvm::transform::Pass, void&gt;)
-        at ../src/driver/driver_api.cc:276
-  10: tvm::transform::Pass::operator()(tvm::IRModule) const
-        at ../src/ir/transform.cc:258
-  9: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  8: tvm::transform::SequentialNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:451
-  7: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/ir/transform.cc:274
-  6: tvm::tir::transform::PrimFuncPassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&amp;) const
-        at ../src/tir/ir/transform.cc:100
-  5: tvm::runtime::TypedPackedFunc&lt;tvm::tir::PrimFunc (tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext)&gt;::operator()(tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext) const
-        at ../include/tvm/runtime/packed_func.h:1749
-  4: tvm::tir::PrimFunc tvm::runtime::detail::typed_packed_call_dispatcher&lt;tvm::tir::PrimFunc&gt;::run&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::runtime::PackedFunc const&amp;, tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;)
-        at ../include/tvm/runtime/packed_func.h:1693
-  3: tvm::runtime::TVMRetValue tvm::runtime::PackedFunc::operator()&lt;tvm::tir::PrimFunc, tvm::IRModule, tvm::transform::PassContext&gt;(tvm::tir::PrimFunc&amp;&amp;, tvm::IRModule&amp;&amp;, tvm::transform::PassContext&amp;&amp;) const
-        at ../include/tvm/runtime/packed_func.h:1617
-  2: tvm::runtime::PackedFuncObj::CallPacked(tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*) const
-        at ../include/tvm/runtime/packed_func.h:1217
-  1: Call
-        at ../include/tvm/runtime/packed_func.h:1213
-  0: operator()
-        at ../src/runtime/c_runtime_api.cc:534
-  File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
-  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
-    raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 256, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2547744
-No: 7   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+  52: 0xffffffffffffffff
+  51: _start
+  50: __libc_start_main
+  49: _Py_UnixMain
+  48: 0x0000000000650da0
+  47: 0x0000000000650afa
+  46: _PyFunction_FastCallDict
+  45: _PyEval_EvalCodeWithName
+  44: _PyEval_EvalFrameDefault
+  43: _PyFunction_FastCallKeywords
+  42: _PyEval_EvalCodeWithName
+  41: _PyEval_EvalFrameDefault
+  40: _PyMethodDef_RawFastCallKeywords
+  39: 0x0000000000546369
+  38: _PyEval_EvalCodeWithName
+  37: _PyEval_EvalFrameDefault
+  36: _PyFunction_FastCallKeywords
+  35: _PyEval_EvalCodeWithName
+  34: _PyEval_EvalFrameDefault
+  33: _PyFunction_FastCallDict
+  32: _PyEval_EvalCodeWithName
+  31: _PyEval_EvalFrameDefault
+  30: _PyObject_FastCallDict
+  29: 0x00000000004c06e1
+  28: _PyFunction_FastCallDict
+  27: _PyEval_EvalFrameDefault
+  26: _PyMethodDescr_FastCallKeywords
+  25: 0x00000000005dcb58
+  24: 0x00000000005dc83f
+  23: 0x00000000004ba127
+  22: _PyEval_EvalFrameDefault
+  21: _PyFunction_FastCallKeywords
+  20: _PyEval_EvalFrameDefault
+  19: _PyFunction_FastCall      [(&#39;tile_f&#39;, [-1, 16, 1, 2]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 2]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2359559
+No: 4   GFLOPS: 115.75/115.75   result: MeasureResult(costs=(0.00200001922,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.2005980014801025, timestamp=1674053321.1974628)      [(&#39;tile_f&#39;, [-1, 2, 2, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7233831
+No: 5   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1201,8 +1090,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 256, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,8904508
-No: 8   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 2, 8]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4367806
+No: 6   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1324,8 +1213,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 64, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 128, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6608963
-No: 9   GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 16, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 256, 2]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9552559
+No: 7   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1447,8 +1336,9 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 4, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 64]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9258557
-No: 10  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 128]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 512]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,190511
+No: 8   GFLOPS: 1.48/115.75     result: MeasureResult(costs=(0.15625212075,), error_no=MeasureErrorNo.NO_ERROR, all_cost=6.140629768371582, timestamp=1674053328.5444367)       [(&#39;tile_f&#39;, [-1, 64, 1, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 1]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7938182
+No: 9   GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1570,8 +1460,26 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 4, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 64, 8]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9603335
-No: 11  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 1, 512]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 2, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3824919
+No: 10  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
+  File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 142, in build
+    res = future.result()
+  File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 435, in result
+    return self.__get_result()
+  File &quot;/usr/lib/python3.7/concurrent/futures/_base.py&quot;, line 384, in __get_result
+    raise self._exception
+  File &quot;/usr/lib/python3.7/concurrent/futures/thread.py&quot;, line 57, in run
+    result = self.fn(*self.args, **self.kwargs)
+  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 432, in &lt;lambda&gt;
+    worker = lambda *args: self._worker_run(*args)
+  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 401, in _worker_run
+    return proc.recv()
+  File &quot;/workspace/python/tvm/contrib/popen_pool.py&quot;, line 309, in recv
+    raise TimeoutError()
+TimeoutError
+
+        [(&#39;tile_f&#39;, [-1, 8, 4, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 1, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9823242
+No: 11  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1693,8 +1601,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 16, 4]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2587768
-No: 12  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 16, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 2]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2754107
+No: 12  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1816,8 +1724,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 32]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 32]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 0)],None,736315
-No: 13  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 1, 32]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 2, 64]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5779367
+No: 13  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -1939,8 +1847,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 2, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,10044555
-No: 14  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 4, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 7]), (&#39;tile_rc&#39;, [-1, 16, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7827752
+No: 14  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2062,8 +1970,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 32]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6348485
-No: 15  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 1, 2, 8]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 16, 32]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,5578243
+No: 15  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2185,8 +2093,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 4, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 4]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7624397
-No: 16  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 2, 4]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 16]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3809631
+No: 16  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2308,8 +2216,9 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 2, 64]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 4, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,4848126
-No: 17  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 8, 1, 32]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 1, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,1913088
+No: 17  GFLOPS: 40.72/115.75    result: MeasureResult(costs=(0.005685248041666667,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.482030391693115, timestamp=1674053344.7685432)        [(&#39;tile_f&#39;, [-1, 8, 1, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2588578
+No: 18  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2431,8 +2340,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 1, 16]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 512, 1]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7195926
-No: 18  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 2, 4, 16]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 1, 128]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9659496
+No: 19  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2554,8 +2463,8 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 4, 8, 2]), (&#39;tile_y&#39;, [-1, 1, 7, 1]), (&#39;tile_x&#39;, [-1, 7, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 8]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 0)],None,2240121
-No: 19  GFLOPS: 0.00/111.35     result: Traceback (most recent call last):
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 256, 1, 1]), (&#39;tile_y&#39;, [-1, 1, 1, 7]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 64, 8]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 0)],None,3795228
+No: 20  GFLOPS: 0.00/115.75     result: Traceback (most recent call last):
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 592, in __call__
     func, arg_info = _build_func_common(measure_input, self.runtime, **kwargs)
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 544, in _build_func_common
@@ -2677,8 +2586,7 @@ Traceback (most recent call last):
   File &quot;tvm/_ffi/_cython/./packed_func.pxi&quot;, line 56, in tvm._ffi._cy3.core.tvm_callback
   File &quot;/workspace/python/tvm/autotvm/measure/measure_methods.py&quot;, line 875, in verify_pass
     raise InstantiationError(&quot;Skipped because of invalid gpu kernel&quot;)
-tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 32, 4, 4]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 8, 64]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 1500), (&#39;unroll_explicit&#39;, 1)],None,9268500
-No: 20  GFLOPS: 234.57/234.57   result: MeasureResult(costs=(0.0009869387213114753,), error_no=MeasureErrorNo.NO_ERROR, all_cost=4.453100681304932, timestamp=1674022621.5645502)       [(&#39;tile_f&#39;, [-1, 1, 4, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7851432
+tvm.autotvm.task.space.InstantiationError: Skipped because of invalid gpu kernel        [(&#39;tile_f&#39;, [-1, 256, 1, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 16, 4]), (&#39;tile_ry&#39;, [-1, 1, 3]), (&#39;tile_rx&#39;, [-1, 1, 3]), (&#39;auto_unroll_max_step&#39;, 0), (&#39;unroll_explicit&#39;, 1)],None,6857188
 </pre></div>
 </div>
 <p>Finally we can inspect the best config from log file, check correctness,
@@ -2717,9 +2625,9 @@ and measure running time.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Finish loading 20 records
 
 Best config:
-[(&#39;tile_f&#39;, [-1, 1, 4, 2]), (&#39;tile_y&#39;, [-1, 1, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 7, 1]), (&#39;tile_rc&#39;, [-1, 8, 8]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 3, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7851432
+[(&#39;tile_f&#39;, [-1, 2, 2, 1]), (&#39;tile_y&#39;, [-1, 7, 1, 1]), (&#39;tile_x&#39;, [-1, 1, 1, 1]), (&#39;tile_rc&#39;, [-1, 2, 4]), (&#39;tile_ry&#39;, [-1, 3, 1]), (&#39;tile_rx&#39;, [-1, 1, 1]), (&#39;auto_unroll_max_step&#39;, 512), (&#39;unroll_explicit&#39;, 1)],None,7233831
 Finish loading 20 records
-Time cost of this operator: 0.001165
+Time cost of this operator: 0.002476
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-tune-with-autotvm-tune-conv2d-cuda-py">
diff --git a/docs/how_to/work_with_microtvm/micro_autotune.html b/docs/how_to/work_with_microtvm/micro_autotune.html
index be321e7fd7..e8a46f5807 100644
--- a/docs/how_to/work_with_microtvm/micro_autotune.html
+++ b/docs/how_to/work_with_microtvm/micro_autotune.html
@@ -646,10 +646,10 @@ the tuned operator.</p>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build without Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  308.2     98.702   (1, 2, 10, 10, 3)  2       1        [308.2]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.09      0.99     (1, 6, 10, 10)     1       1        [3.09]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.965     0.309    (1, 1, 10, 10, 3)  1       1        [0.965]
-Total_time                                    -                                             312.254   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  310.8     98.702   (1, 2, 10, 10, 3)  2       1        [310.8]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       3.088     0.981    (1, 6, 10, 10)     1       1        [3.088]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.998     0.317    (1, 1, 10, 10, 3)  1       1        [0.998]
+Total_time                                    -                                             314.886   -        -                  -       -        -
 </pre></div>
 </div>
 </div>
@@ -701,10 +701,10 @@ Total_time                                    -
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>########## Build with Autotuning ##########
 Node Name                                     Ops                                           Time(us)  Time(%)  Shape              Inputs  Outputs  Measurements(us)
 ---------                                     ---                                           --------  -------  -----              ------  -------  ----------------
-tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  100.1     97.36    (1, 6, 10, 10, 1)  2       1        [100.1]
-tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.75      1.703    (1, 6, 10, 10)     1       1        [1.75]
-tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.964     0.938    (1, 1, 10, 10, 3)  1       1        [0.964]
-Total_time                                    -                                             102.814   -        -                  -       -        -
+tvmgen_default_fused_nn_contrib_conv2d_NCHWc  tvmgen_default_fused_nn_contrib_conv2d_NCHWc  100.2     97.306   (1, 6, 10, 10, 1)  2       1        [100.2]
+tvmgen_default_fused_layout_transform_1       tvmgen_default_fused_layout_transform_1       1.779     1.727    (1, 6, 10, 10)     1       1        [1.779]
+tvmgen_default_fused_layout_transform         tvmgen_default_fused_layout_transform         0.996     0.967    (1, 1, 10, 10, 3)  1       1        [0.996]
+Total_time                                    -                                             102.975   -        -                  -       -        -
 </pre></div>
 </div>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-autotune-py">
diff --git a/docs/how_to/work_with_microtvm/micro_pytorch.html b/docs/how_to/work_with_microtvm/micro_pytorch.html
index 87b57cf915..3fbc9fff4c 100644
--- a/docs/how_to/work_with_microtvm/micro_pytorch.html
+++ b/docs/how_to/work_with_microtvm/micro_pytorch.html
@@ -453,7 +453,7 @@ download a cat image and preprocess it to use as the model input.</p>
 Downloading: &quot;https://download.pytorch.org/models/quantized/mobilenet_v2_qnnpack_37f702c5.pth&quot; to /workspace/.cache/torch/hub/checkpoints/mobilenet_v2_qnnpack_37f702c5.pth
 
   0%|          | 0.00/3.42M [00:00&lt;?, ?B/s]
-100%|##########| 3.42M/3.42M [00:00&lt;00:00, 71.2MB/s]
+100%|##########| 3.42M/3.42M [00:00&lt;00:00, 108MB/s]
 /workspace/python/tvm/relay/frontend/pytorch_utils.py:47: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
   return LooseVersion(torch_ver) &gt; ver
 /venv/apache-tvm-py3.7/lib/python3.7/site-packages/setuptools/_distutils/version.py:346: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
@@ -577,7 +577,7 @@ via the host <cite>main.cc`</cite> or if a Zephyr emulated board is selected as
 Torch top-1 id: 282, class name: tiger cat
 </pre></div>
 </div>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  7.816 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 1 minutes  10.386 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-pytorch-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/12b9ecc04c41abaa12022061771821d1/micro_pytorch.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_pytorch.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/micro_train.html b/docs/how_to/work_with_microtvm/micro_train.html
index 6de6b75d46..67ca9b5313 100644
--- a/docs/how_to/work_with_microtvm/micro_train.html
+++ b/docs/how_to/work_with_microtvm/micro_train.html
@@ -523,7 +523,7 @@ take about <strong>2 minutes</strong> to download the Stanford Cars, while COCO
 <a href="https://docs.python.org/3/library/shutil.html#shutil.move" title="shutil.move" class="sphx-glr-backref-module-shutil sphx-glr-backref-type-py-function"><span class="n">shutil</span><span class="o">.</span><span class="n">move</span></a><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><a href="https://docs.python.org/3/library/stdtypes.html#str" title="builtins.str" class="sphx-glr-backref-module-builtins sphx-glr-backref-typ [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmpdfofimzq/images/random&#39;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&#39;/tmp/tmp1dvacfup/images/random&#39;
 </pre></div>
 </div>
 </div>
@@ -583,8 +583,8 @@ objects to other stuff? We can display some examples from our datasets using <co
     <span class="n">plt</span><span class="o">.</span><span class="n">axis</span><span class="p">(</span><span class="s2">&quot;off&quot;</span><span class="p">)</span>
 </pre></div>
 </div>
-<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmpdfofimzq/images/target contains 8144 images
-/tmp/tmpdfofimzq/images/random contains 5000 images
+<img src="../../_images/sphx_glr_micro_train_001.png" srcset="../../_images/sphx_glr_micro_train_001.png" alt="[0.0, 1.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]" class = "sphx-glr-single-img"/><div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>/tmp/tmp1dvacfup/images/target contains 8144 images
+/tmp/tmp1dvacfup/images/random contains 5000 images
 </pre></div>
 </div>
 </div>
@@ -696,13 +696,13 @@ the time on our validation set).</p>
 </pre></div>
 </div>
 <div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>Epoch 1/3
-328/328 - 47s - loss: 0.2342 - accuracy: 0.9197 - val_loss: 0.1264 - val_accuracy: 0.9532 - 47s/epoch - 143ms/step
+328/328 - 47s - loss: 0.2087 - accuracy: 0.9283 - val_loss: 0.2082 - val_accuracy: 0.9335 - 47s/epoch - 144ms/step
 Epoch 2/3
-328/328 - 43s - loss: 0.1031 - accuracy: 0.9623 - val_loss: 0.1160 - val_accuracy: 0.9532 - 43s/epoch - 131ms/step
+328/328 - 44s - loss: 0.0912 - accuracy: 0.9667 - val_loss: 0.1188 - val_accuracy: 0.9585 - 44s/epoch - 133ms/step
 Epoch 3/3
-328/328 - 43s - loss: 0.0653 - accuracy: 0.9745 - val_loss: 0.1150 - val_accuracy: 0.9600 - 43s/epoch - 131ms/step
+328/328 - 43s - loss: 0.0671 - accuracy: 0.9753 - val_loss: 0.1070 - val_accuracy: 0.9645 - 43s/epoch - 132ms/step
 
-&lt;keras.callbacks.History object at 0x7f2087634f90&gt;
+&lt;keras.callbacks.History object at 0x7f3059d59450&gt;
 </pre></div>
 </div>
 </div>
@@ -962,7 +962,7 @@ as intended.</p>
 <p>From here, we could modify the model to read live images from the camera - we have another
 Arduino tutorial for how to do that <a class="reference external" href="https://github.com/guberti/tvm-arduino-demos/tree/master/examples/person_detection">on GitHub</a>. Alternatively, we could also
 <a class="reference external" href="https://tvm.apache.org/docs/how_to/work_with_microtvm/micro_autotune.html">use TVM’s autotuning capabilities</a> to dramatically improve the model’s performance.</p>
-<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  45.460 seconds)</p>
+<p class="sphx-glr-timing"><strong>Total running time of the script:</strong> ( 4 minutes  42.715 seconds)</p>
 <div class="sphx-glr-footer sphx-glr-footer-example docutils container" id="sphx-glr-download-how-to-work-with-microtvm-micro-train-py">
 <div class="sphx-glr-download sphx-glr-download-python docutils container">
 <p><a class="reference download internal" download="" href="../../_downloads/b52cec46baf4f78d6bcd94cbe269c8a6/micro_train.py"><code class="xref download docutils literal notranslate"><span class="pre">Download</span> <span class="pre">Python</span> <span class="pre">source</span> <span class="pre">code:</span> <span class="pre">micro_train.py</span></code></a></p>
diff --git a/docs/how_to/work_with_microtvm/sg_execution_times.html b/docs/how_to/work_with_microtvm/sg_execution_times.html
index 55868eb4ba..2373b0ceb1 100644
--- a/docs/how_to/work_with_microtvm/sg_execution_times.html
+++ b/docs/how_to/work_with_microtvm/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-microtvm-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>06:55.995</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
+<p><strong>06:58.575</strong> total execution time for <strong>how_to_work_with_microtvm</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,23 +349,23 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_train.html#sphx-glr-how-to-work-with-microtvm-micro-train-py"><span class="std std-ref">Training Vision Models for microTVM on Arduino</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_train.py</span></code>)</p></td>
-<td><p>04:45.460</p></td>
+<td><p>04:42.715</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_pytorch.html#sphx-glr-how-to-work-with-microtvm-micro-pytorch-py"><span class="std std-ref">microTVM PyTorch Tutorial</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_pytorch.py</span></code>)</p></td>
-<td><p>01:07.816</p></td>
+<td><p>01:10.386</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_autotune.html#sphx-glr-how-to-work-with-microtvm-micro-autotune-py"><span class="std std-ref">Autotuning with microTVM</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_autotune.py</span></code>)</p></td>
-<td><p>00:50.563</p></td>
+<td><p>00:52.417</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_aot.html#sphx-glr-how-to-work-with-microtvm-micro-aot-py"><span class="std std-ref">microTVM Host-Driven AoT</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_aot.py</span></code>)</p></td>
-<td><p>00:08.439</p></td>
+<td><p>00:09.135</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="micro_tflite.html#sphx-glr-how-to-work-with-microtvm-micro-tflite-py"><span class="std std-ref">microTVM with TFLite Models</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_tflite.py</span></code>)</p></td>
-<td><p>00:03.716</p></td>
+<td><p>00:03.923</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="micro_ethosu.html#sphx-glr-how-to-work-with-microtvm-micro-ethosu-py"><span class="std std-ref">Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU with CMSIS-NN</span></a> (<code class="docutils literal notranslate"><span class="pre">micro_ethosu.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_relay/sg_execution_times.html b/docs/how_to/work_with_relay/sg_execution_times.html
index d181ee15df..944c0e13aa 100644
--- a/docs/how_to/work_with_relay/sg_execution_times.html
+++ b/docs/how_to/work_with_relay/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-relay-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:43.054</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
+<p><strong>00:44.712</strong> total execution time for <strong>how_to_work_with_relay</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 84%" />
@@ -349,15 +349,15 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="using_pipeline_executor.html#sphx-glr-how-to-work-with-relay-using-pipeline-executor-py"><span class="std std-ref">Using Pipeline Executor in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_pipeline_executor.py</span></code>)</p></td>
-<td><p>00:31.577</p></td>
+<td><p>00:32.703</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_external_lib.html#sphx-glr-how-to-work-with-relay-using-external-lib-py"><span class="std std-ref">Using External Libraries in Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_external_lib.py</span></code>)</p></td>
-<td><p>00:09.963</p></td>
+<td><p>00:10.408</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="build_gcn.html#sphx-glr-how-to-work-with-relay-build-gcn-py"><span class="std std-ref">Building a Graph Convolutional Network</span></a> (<code class="docutils literal notranslate"><span class="pre">build_gcn.py</span></code>)</p></td>
-<td><p>00:01.508</p></td>
+<td><p>00:01.595</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="using_relay_viz.html#sphx-glr-how-to-work-with-relay-using-relay-viz-py"><span class="std std-ref">Use Relay Visualizer to Visualize Relay</span></a> (<code class="docutils literal notranslate"><span class="pre">using_relay_viz.py</span></code>)</p></td>
diff --git a/docs/how_to/work_with_schedules/intrin_math.html b/docs/how_to/work_with_schedules/intrin_math.html
index 7c3b5cc9e4..531e859c95 100644
--- a/docs/how_to/work_with_schedules/intrin_math.html
+++ b/docs/how_to/work_with_schedules/intrin_math.html
@@ -535,7 +535,7 @@ The following example customizes CUDA lowering rule for <code class="code docuti
 <a href="../../reference/api/python/ir.html#tvm.ir.register_intrin_lowering" title="tvm.ir.register_intrin_lowering" class="sphx-glr-backref-module-tvm-ir sphx-glr-backref-type-py-function"><span class="n">register_intrin_lowering</span></a><span class="p">(</span><span class="s2">&quot;tir.exp&quot;</span><span class="p">,</span> <span class="n">target</span><span class="o">=</span><span class="s2">&quot;cuda&quot;</span><span class="p">,</span> <span class="n">f</span><span class="o">= [...]
 </pre></div>
 </div>
-<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f2087e5d9e0&gt;
+<div class="sphx-glr-script-out highlight-none notranslate"><div class="highlight"><pre><span></span>&lt;function my_cuda_math_rule at 0x7f305a4eb560&gt;
 </pre></div>
 </div>
 <p>Register the rule to TVM with override option to override existing rule.
diff --git a/docs/how_to/work_with_schedules/sg_execution_times.html b/docs/how_to/work_with_schedules/sg_execution_times.html
index 6fea058f8b..b25531994b 100644
--- a/docs/how_to/work_with_schedules/sg_execution_times.html
+++ b/docs/how_to/work_with_schedules/sg_execution_times.html
@@ -340,7 +340,7 @@
             
   <div class="section" id="computation-times">
 <span id="sphx-glr-how-to-work-with-schedules-sg-execution-times"></span><h1>Computation times<a class="headerlink" href="#computation-times" title="Permalink to this headline">¶</a></h1>
-<p><strong>00:04.522</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
+<p><strong>00:07.862</strong> total execution time for <strong>how_to_work_with_schedules</strong> files:</p>
 <table class="docutils align-default">
 <colgroup>
 <col style="width: 83%" />
@@ -349,27 +349,27 @@
 </colgroup>
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="intrin_math.html#sphx-glr-how-to-work-with-schedules-intrin-math-py"><span class="std std-ref">Intrinsics and Math Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">intrin_math.py</span></code>)</p></td>
-<td><p>00:02.165</p></td>
+<td><p>00:05.261</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tensorize.html#sphx-glr-how-to-work-with-schedules-tensorize-py"><span class="std std-ref">Use Tensorize to Leverage Hardware Intrinsics</span></a> (<code class="docutils literal notranslate"><span class="pre">tensorize.py</span></code>)</p></td>
-<td><p>00:01.088</p></td>
+<td><p>00:01.222</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="reduction.html#sphx-glr-how-to-work-with-schedules-reduction-py"><span class="std std-ref">Reduction</span></a> (<code class="docutils literal notranslate"><span class="pre">reduction.py</span></code>)</p></td>
-<td><p>00:00.534</p></td>
+<td><p>00:00.583</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="scan.html#sphx-glr-how-to-work-with-schedules-scan-py"><span class="std std-ref">Scan and Recurrent Kernel</span></a> (<code class="docutils literal notranslate"><span class="pre">scan.py</span></code>)</p></td>
-<td><p>00:00.515</p></td>
+<td><p>00:00.571</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="extern_op.html#sphx-glr-how-to-work-with-schedules-extern-op-py"><span class="std std-ref">External Tensor Functions</span></a> (<code class="docutils literal notranslate"><span class="pre">extern_op.py</span></code>)</p></td>
-<td><p>00:00.116</p></td>
+<td><p>00:00.118</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py"><span class="std std-ref">Schedule Primitives in TVM</span></a> (<code class="docutils literal notranslate"><span class="pre">schedule_primitives.py</span></code>)</p></td>
-<td><p>00:00.049</p></td>
+<td><p>00:00.051</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="tedd.html#sphx-glr-how-to-work-with-schedules-tedd-py"><span class="std std-ref">Use Tensor Expression Debug Display (TEDD) for Visualization</span></a> (<code class="docutils literal notranslate"><span class="pre">tedd.py</span></code>)</p></td>
@@ -377,7 +377,7 @@
 <td><p>0.0 MB</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="tuple_inputs.html#sphx-glr-how-to-work-with-schedules-tuple-inputs-py"><span class="std std-ref">Compute and Reduce with Tuple Inputs</span></a> (<code class="docutils literal notranslate"><span class="pre">tuple_inputs.py</span></code>)</p></td>
-<td><p>00:00.023</p></td>
+<td><p>00:00.024</p></td>
 <td><p>0.0 MB</p></td>
 </tr>
 </tbody>
diff --git a/docs/how_to/work_with_schedules/tensorize.html b/docs/how_to/work_with_schedules/tensorize.html
index b018978aa0..d37b52d0e8 100644
--- a/docs/how_to/work_with_schedules/tensorize.html
+++ b/docs/how_to/work_with_schedules/tensorize.html
@@ -586,7 +586,7 @@ class Module:
         B_1 = T.match_buffer(B, (512, 64))
         C_1 = T.match_buffer(C, (1024, 512))
         i = T.var(&quot;int32&quot;)
-        T.attr(T.iter_var(i, None, &quot;DataPar&quot;, &quot;&quot;), &quot;pragma_import_llvm&quot;, &quot;; ModuleID = &#39;/tmp/tmpa__vx31f/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpa__vx31f/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca  [...]
+        T.attr(T.iter_var(i, None, &quot;DataPar&quot;, &quot;&quot;), &quot;pragma_import_llvm&quot;, &quot;; ModuleID = &#39;/tmp/tmpk1_7pted/input0.cc&#39;\nsource_filename = \&quot;/tmp/tmpk1_7pted/input0.cc\&quot;\ntarget datalayout = \&quot;e-m:e-i64:64-f80:128-n8:16:32:64-S128\&quot;\ntarget triple = \&quot;x86_64-pc-linux-gnu\&quot;\n\n; Function Attrs: noinline nounwind optnone uwtable\ndefine dso_local i32 @gemv_update(float*, float*, float*, i32, i32, i32) #0 {\n  %7 = alloca  [...]
         for i, j_outer in T.grid(1024, 32):
             T.call_extern(&quot;int32&quot;, &quot;gemv_update&quot;, T.tvm_access_ptr(T.type_annotation(&quot;float32&quot;), C_1.data, i * 512 + j_outer * 16, 16, 2), T.tvm_access_ptr(T.type_annotation(&quot;float32&quot;), A_1.data, i * 64, 64, 1), T.tvm_access_ptr(T.type_annotation(&quot;float32&quot;), B_1.data, j_outer * 1024, 1024, 1), 16, 64, 64)
 </pre></div>
diff --git a/docs/install/nnpack.html b/docs/install/nnpack.html
index 1ef28de467..23d2181e9d 100644
--- a/docs/install/nnpack.html
+++ b/docs/install/nnpack.html
@@ -229,7 +229,17 @@
               <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
 <ul class="current">
 <li class="toctree-l1 current"><a class="reference internal" href="index.html">Installing TVM</a><ul class="current">
-<li class="toctree-l2"><a class="reference internal" href="from_source.html">Install from Source</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="from_source.html">Install from Source</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#developers-get-source-from-github">Developers: Get Source from Github</a></li>
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#build-the-shared-library">Build the Shared Library</a></li>
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#python-package-installation">Python Package Installation</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="from_source.html#install-contrib-libraries">Install Contrib Libraries</a><ul class="current">
+<li class="toctree-l4 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="from_source.html#enable-c-tests">Enable C++ Tests</a></li>
+</ul>
+</li>
 <li class="toctree-l2"><a class="reference internal" href="docker.html">Docker Images</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">NNPACK Contrib Installation</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="#conditions">Conditions</a></li>
diff --git a/docs/objects.inv b/docs/objects.inv
index 41f3142a66..1a9184f6c2 100644
Binary files a/docs/objects.inv and b/docs/objects.inv differ
diff --git a/docs/reference/api/doxygen/algorithm_8h__incl.svg b/docs/reference/api/doxygen/algorithm_8h__incl.svg
index 1374d088fe..1397c560a0 100644
--- a/docs/reference/api/doxygen/algorithm_8h__incl.svg
+++ b/docs/reference/api/doxygen/algorithm_8h__incl.svg
@@ -1419,30 +1419,30 @@
 <path fill="none" stroke="#191970" d="M2108.3145,-960.2022C1981.1157,-957.6994 1628.4977,-948.078 1337,-915 1082.1873,-886.0849 782.1619,-826.7381 655.4164,-800.4846"/>
 <polygon fill="#191970" stroke="#191970" points="656.0076,-797.0327 645.5047,-798.4257 654.5839,-803.8864 656.0076,-797.0327"/>
 </g>
-<!-- Node79 -->
+<!-- Node78 -->
 <g id="node49" class="node">
-<title>Node79</title>
+<title>Node78</title>
 <polygon fill="#ffffff" stroke="#bfbfbf" points="2334.5,-895.5 2334.5,-914.5 2377.5,-914.5 2377.5,-895.5 2334.5,-895.5"/>
 <text text-anchor="middle" x="2356" y="-902.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">stack</text>
 </g>
-<!-- Node56&#45;&gt;Node79 -->
+<!-- Node56&#45;&gt;Node78 -->
 <g id="edge215" class="edge">
-<title>Node56&#45;&gt;Node79</title>
+<title>Node56&#45;&gt;Node78</title>
 <path fill="none" stroke="#191970" d="M2191.2093,-951.3733C2228.6322,-940.8422 2288.1167,-924.1028 2324.4521,-913.8778"/>
 <polygon fill="#191970" stroke="#191970" points="2325.8142,-917.1305 2334.4922,-911.0524 2323.918,-910.3922 2325.8142,-917.1305"/>
 </g>
-<!-- Node80 -->
+<!-- Node79 -->
 <g id="node50" class="node">
-<title>Node80</title>
+<title>Node79</title>
 <g id="a_node50"><a xlink:href="relay_2type_8h.html" target="_top" xlink:title="Relay typed AST nodes. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="2433.5,-895.5 2433.5,-914.5 2486.5,-914.5 2486.5,-895.5 2433.5,-895.5"/>
 <text text-anchor="middle" x="2460" y="-902.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">./type.h</text>
 </a>
 </g>
 </g>
-<!-- Node56&#45;&gt;Node80 -->
+<!-- Node56&#45;&gt;Node79 -->
 <g id="edge219" class="edge">
-<title>Node56&#45;&gt;Node80</title>
+<title>Node56&#45;&gt;Node79</title>
 <path fill="none" stroke="#191970" d="M2205.6822,-952.0026C2266.3454,-940.791 2368.6519,-921.8828 2423.1825,-911.8045"/>
 <polygon fill="#191970" stroke="#191970" points="2423.9093,-915.2296 2433.1066,-909.9704 2422.637,-908.3462 2423.9093,-915.2296"/>
 </g>
@@ -1737,33 +1737,33 @@
 <path fill="none" stroke="#191970" d="M542.4924,-782.2094C461.9796,-773.7351 301.4887,-756.0216 166,-736 164.7421,-735.8141 163.4692,-735.6222 162.186,-735.4252"/>
 <polygon fill="#191970" stroke="#191970" points="162.647,-731.9546 152.2203,-733.8347 161.5437,-738.8671 162.647,-731.9546"/>
 </g>
-<!-- Node80&#45;&gt;Node1 -->
+<!-- Node79&#45;&gt;Node1 -->
 <g id="edge220" class="edge">
-<title>Node80&#45;&gt;Node1</title>
+<title>Node79&#45;&gt;Node1</title>
 <path fill="none" stroke="#191970" d="M2433.2809,-901.0471C2372.7511,-892.0261 2226.6551,-869.8649 2178,-859 1937.4502,-805.2844 1654.8935,-715.6912 1553.5631,-682.6983"/>
 <polygon fill="#191970" stroke="#191970" points="1554.3657,-679.2786 1543.7733,-679.5031 1552.1937,-685.9332 1554.3657,-679.2786"/>
 </g>
-<!-- Node80&#45;&gt;Node16 -->
+<!-- Node79&#45;&gt;Node16 -->
 <g id="edge223" class="edge">
-<title>Node80&#45;&gt;Node16</title>
+<title>Node79&#45;&gt;Node16</title>
 <path fill="none" stroke="#191970" d="M2486.5214,-904.5449C2698.2359,-900.7441 4106,-872.2835 4106,-787.5 4106,-787.5 4106,-787.5 4106,-133 4106,-95.6561 4086.5493,-85.3061 4054,-67 3986.9936,-29.3149 3749.9905,-18.8755 3663.1746,-16.2812"/>
 <polygon fill="#191970" stroke="#191970" points="3663.2139,-12.781 3653.1187,-15.9961 3663.0154,-19.7781 3663.2139,-12.781"/>
 </g>
-<!-- Node80&#45;&gt;Node51 -->
+<!-- Node79&#45;&gt;Node51 -->
 <g id="edge221" class="edge">
-<title>Node80&#45;&gt;Node51</title>
+<title>Node79&#45;&gt;Node51</title>
 <path fill="none" stroke="#191970" d="M2433.3233,-899.2925C2365.5359,-884.0242 2192,-839.8273 2192,-787.5 2192,-787.5 2192,-787.5 2192,-726 2192,-672.4276 2187.891,-609.3546 2185.5627,-577.9146"/>
 <polygon fill="#191970" stroke="#191970" points="2189.0447,-577.5446 2184.7963,-567.8389 2182.0649,-578.0756 2189.0447,-577.5446"/>
 </g>
-<!-- Node80&#45;&gt;Node52 -->
+<!-- Node79&#45;&gt;Node52 -->
 <g id="edge224" class="edge">
-<title>Node80&#45;&gt;Node52</title>
+<title>Node79&#45;&gt;Node52</title>
 <path fill="none" stroke="#191970" d="M2486.6822,-903.4876C2618.2739,-896.029 3199.3695,-863.0924 3388.3763,-852.3795"/>
 <polygon fill="#191970" stroke="#191970" points="3388.6195,-855.8714 3398.4054,-851.811 3388.2233,-848.8826 3388.6195,-855.8714"/>
 </g>
-<!-- Node80&#45;&gt;Node53 -->
+<!-- Node79&#45;&gt;Node53 -->
 <g id="edge222" class="edge">
-<title>Node80&#45;&gt;Node53</title>
+<title>Node79&#45;&gt;Node53</title>
 <path fill="none" stroke="#191970" d="M2465.3192,-895.3845C2476.0795,-875.9331 2500.5966,-831.6138 2514.6298,-806.2462"/>
 <polygon fill="#191970" stroke="#191970" points="2517.8292,-807.693 2519.6073,-797.2484 2511.704,-804.3045 2517.8292,-807.693"/>
 </g>
diff --git a/docs/reference/api/doxygen/algorithms_8h__incl.svg b/docs/reference/api/doxygen/algorithms_8h__incl.svg
index b1a5ca58d0..853443cf30 100644
--- a/docs/reference/api/doxygen/algorithms_8h__incl.svg
+++ b/docs/reference/api/doxygen/algorithms_8h__incl.svg
@@ -76,9 +76,9 @@
 <path fill="none" stroke="#191970" d="M953.3452,-951.4509C1008.7594,-931.3655 1137.428,-884.7284 1200.2292,-861.9655"/>
 <polygon fill="#191970" stroke="#191970" points="1201.5127,-865.2231 1209.7215,-858.5249 1199.1273,-858.6421 1201.5127,-865.2231"/>
 </g>
-<!-- Node71 -->
+<!-- Node70 -->
 <g id="node48" class="node">
-<title>Node71</title>
+<title>Node70</title>
 <g id="a_node48"><a xlink:href="device__api_8h.html" target="_top" xlink:title="Abstract device memory management API. ">
 <polygon fill="#ffffff" stroke="#000000" points="2018.5,-492.5 2018.5,-522.5 2131.5,-522.5 2131.5,-492.5 2018.5,-492.5"/>
 <text text-anchor="start" x="2026.5" y="-510.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/runtime/device</text>
@@ -86,24 +86,24 @@
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node71 -->
+<!-- Node1&#45;&gt;Node70 -->
 <g id="edge156" class="edge">
-<title>Node1&#45;&gt;Node71</title>
+<title>Node1&#45;&gt;Node70</title>
 <path fill="none" stroke="#191970" d="M900.4737,-951.3504C872.6009,-939.686 834.9834,-918.654 852,-895 1036.0968,-639.0954 1224.0093,-764.7195 1525,-671 1701.2696,-616.1149 1910.571,-555.0083 2012.6971,-525.4528"/>
 <polygon fill="#191970" stroke="#191970" points="2013.9039,-528.7473 2022.5377,-522.6066 2011.9589,-522.0229 2013.9039,-528.7473"/>
 </g>
-<!-- Node72 -->
+<!-- Node71 -->
 <g id="node49" class="node">
-<title>Node72</title>
+<title>Node71</title>
 <g id="a_node49"><a xlink:href="stmt_8h.html" target="_top" xlink:title="TIR statements. ">
 <polygon fill="#ffffff" stroke="#000000" points="235,-839.5 235,-858.5 319,-858.5 319,-839.5 235,-839.5"/>
 <text text-anchor="middle" x="277" y="-846.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/stmt.h</text>
 </a>
 </g>
 </g>
-<!-- Node1&#45;&gt;Node72 -->
+<!-- Node1&#45;&gt;Node71 -->
 <g id="edge162" class="edge">
-<title>Node1&#45;&gt;Node72</title>
+<title>Node1&#45;&gt;Node71</title>
 <path fill="none" stroke="#191970" d="M870.2864,-957.9687C758.3623,-951.6207 512.8295,-935.7799 431,-915 384.1905,-903.1131 333.3216,-878.8834 303.3482,-863.3209"/>
 <polygon fill="#191970" stroke="#191970" points="304.8614,-860.1622 294.3825,-858.5994 301.5997,-866.3558 304.8614,-860.1622"/>
 </g>
@@ -1348,132 +1348,132 @@
 <path fill="none" stroke="#191970" d="M610.05,-615.4765C574.6612,-601.098 511.9114,-570.3873 479,-523 421.5485,-440.2786 416.269,-313.2171 416.5111,-264.7289"/>
 <polygon fill="#191970" stroke="#191970" points="420.0118,-264.6934 416.6518,-254.6455 413.0125,-264.5957 420.0118,-264.6934"/>
 </g>
-<!-- Node71&#45;&gt;Node10 -->
+<!-- Node70&#45;&gt;Node10 -->
 <g id="edge157" class="edge">
-<title>Node71&#45;&gt;Node10</title>
+<title>Node70&#45;&gt;Node10</title>
 <path fill="none" stroke="#191970" d="M2131.6741,-495.3724C2214.7798,-474.5957 2368.9287,-424.1705 2453,-322 2509.2036,-253.6967 2488.1188,-205.4171 2456,-123 2452.1193,-113.0422 2445.9366,-103.1636 2439.782,-94.762"/>
 <polygon fill="#191970" stroke="#191970" points="2442.3597,-92.3735 2433.4763,-86.5998 2436.8202,-96.6531 2442.3597,-92.3735"/>
 </g>
-<!-- Node71&#45;&gt;Node15 -->
+<!-- Node70&#45;&gt;Node15 -->
 <g id="edge160" class="edge">
-<title>Node71&#45;&gt;Node15</title>
+<title>Node70&#45;&gt;Node15</title>
 <path fill="none" stroke="#191970" d="M2131.5414,-502.1124C2163.1501,-499.1115 2203.2621,-495.3215 2239,-492 2326.543,-483.8637 2555.1223,-496.8049 2633,-456 2716.2103,-412.401 2883.3985,-160.5182 2929.1778,-89.9009"/>
 <polygon fill="#191970" stroke="#191970" points="2932.3249,-91.4793 2934.8104,-81.1801 2926.4448,-87.6814 2932.3249,-91.4793"/>
 </g>
-<!-- Node71&#45;&gt;Node36 -->
+<!-- Node70&#45;&gt;Node36 -->
 <g id="edge158" class="edge">
-<title>Node71&#45;&gt;Node36</title>
+<title>Node70&#45;&gt;Node36</title>
 <path fill="none" stroke="#191970" d="M2075.6766,-492.389C2076.7654,-468.0735 2078.9061,-420.2644 2080.1148,-393.2698"/>
 <polygon fill="#191970" stroke="#191970" points="2083.6191,-393.2507 2080.57,-383.1042 2076.6261,-392.9376 2083.6191,-393.2507"/>
 </g>
-<!-- Node71&#45;&gt;Node42 -->
+<!-- Node70&#45;&gt;Node42 -->
 <g id="edge159" class="edge">
-<title>Node71&#45;&gt;Node42</title>
+<title>Node70&#45;&gt;Node42</title>
 <path fill="none" stroke="#191970" d="M2055.7122,-492.2967C2044.301,-483.302 2029.6693,-471.7687 2017.1104,-461.8694"/>
 <polygon fill="#191970" stroke="#191970" points="2019.2317,-459.0849 2009.2115,-455.6432 2014.8984,-464.5824 2019.2317,-459.0849"/>
 </g>
-<!-- Node72&#45;&gt;Node16 -->
+<!-- Node71&#45;&gt;Node16 -->
 <g id="edge176" class="edge">
-<title>Node72&#45;&gt;Node16</title>
+<title>Node71&#45;&gt;Node16</title>
 <path fill="none" stroke="#191970" d="M234.8638,-844.3361C175.9242,-835.21 76,-809.1053 76,-737 76,-737 76,-737 76,-569 76,-416.3464 190,-397.6536 190,-245 190,-245 190,-245 190,-189 190,-113.6415 1402.774,-78.9474 1653.1223,-72.5884"/>
 <polygon fill="#191970" stroke="#191970" points="1653.3991,-76.0826 1663.3078,-72.3319 1653.2228,-69.0849 1653.3991,-76.0826"/>
 </g>
-<!-- Node72&#45;&gt;Node17 -->
+<!-- Node71&#45;&gt;Node17 -->
 <g id="edge177" class="edge">
-<title>Node72&#45;&gt;Node17</title>
+<title>Node71&#45;&gt;Node17</title>
 <path fill="none" stroke="#191970" d="M234.7845,-843.0281C180.8774,-834.8821 91.0888,-819.4129 62,-803 26.9485,-783.2227 0,-777.2462 0,-737 0,-737 0,-737 0,-189 0,-154.4675 11.6029,-141.1191 41,-123 86.9958,-94.6502 465.1786,-77.3872 580.0409,-72.7609"/>
 <polygon fill="#191970" stroke="#191970" points="580.4615,-76.2471 590.3146,-72.3527 580.1835,-69.2526 580.4615,-76.2471"/>
 </g>
-<!-- Node72&#45;&gt;Node19 -->
+<!-- Node71&#45;&gt;Node19 -->
 <g id="edge178" class="edge">
-<title>Node72&#45;&gt;Node19</title>
+<title>Node71&#45;&gt;Node19</title>
 <path fill="none" stroke="#191970" d="M261.8458,-839.289C236.9197,-821.9931 190,-783.2886 190,-737 190,-737 190,-737 190,-569 190,-358.6632 389.6326,-394.0436 573,-291 597.5689,-277.1935 627.1176,-265.0616 649.4001,-256.7282"/>
 <polygon fill="#191970" stroke="#191970" points="650.9011,-259.9057 659.0827,-253.1743 648.4891,-253.3343 650.9011,-259.9057"/>
 </g>
-<!-- Node72&#45;&gt;Node15 -->
+<!-- Node71&#45;&gt;Node15 -->
 <g id="edge175" class="edge">
-<title>Node72&#45;&gt;Node15</title>
+<title>Node71&#45;&gt;Node15</title>
 <path fill="none" stroke="#191970" d="M319.2097,-848.5169C671.5746,-844.3659 3114.5607,-813.2517 3253,-747 3287.9613,-730.2689 3310,-719.7585 3310,-681 3310,-681 3310,-681 3310,-189 3310,-154.6999 3298.4898,-142.1004 3270,-123 3221.7134,-90.6272 3046.2536,-77.3347 2973.2306,-73.1227"/>
 <polygon fill="#191970" stroke="#191970" points="2973.4064,-69.6272 2963.2273,-72.566 2973.0173,-76.6164 2973.4064,-69.6272"/>
 </g>
-<!-- Node73 -->
+<!-- Node72 -->
 <g id="node50" class="node">
-<title>Node73</title>
+<title>Node72</title>
 <g id="a_node50"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
 <polygon fill="#ffffff" stroke="#ff0000" points="1242.5,-783.5 1242.5,-802.5 1325.5,-802.5 1325.5,-783.5 1242.5,-783.5"/>
 <text text-anchor="middle" x="1284" y="-790.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
-<!-- Node72&#45;&gt;Node73 -->
+<!-- Node71&#45;&gt;Node72 -->
 <g id="edge163" class="edge">
-<title>Node72&#45;&gt;Node73</title>
+<title>Node71&#45;&gt;Node72</title>
 <path fill="none" stroke="#191970" d="M319.3064,-846.6473C480.53,-837.6815 1056.2038,-805.6679 1232.3883,-795.8702"/>
 <polygon fill="#191970" stroke="#191970" points="1232.6451,-799.3614 1242.4353,-795.3114 1232.2563,-792.3722 1232.6451,-799.3614"/>
 </g>
-<!-- Node73&#45;&gt;Node2 -->
+<!-- Node72&#45;&gt;Node2 -->
 <g id="edge164" class="edge">
-<title>Node73&#45;&gt;Node2</title>
+<title>Node72&#45;&gt;Node2</title>
 <path fill="none" stroke="#191970" d="M1325.5069,-787.2179C1399.9016,-776.8545 1555.6525,-755.1579 1635.8745,-743.9827"/>
 <polygon fill="#191970" stroke="#191970" points="1636.7875,-747.3894 1646.2089,-742.543 1635.8216,-740.4563 1636.7875,-747.3894"/>
 </g>
-<!-- Node73&#45;&gt;Node4 -->
+<!-- Node72&#45;&gt;Node4 -->
 <g id="edge165" class="edge">
-<title>Node73&#45;&gt;Node4</title>
+<title>Node72&#45;&gt;Node4</title>
 <path fill="none" stroke="#191970" d="M1305.8153,-783.4033C1338.6773,-769.274 1403.0971,-742.8396 1460,-727 1750.2937,-646.1931 2108.1326,-594.8034 2247.9966,-576.5088"/>
 <polygon fill="#191970" stroke="#191970" points="2248.6176,-579.9576 2258.0829,-575.1976 2247.7151,-573.016 2248.6176,-579.9576"/>
 </g>
-<!-- Node73&#45;&gt;Node33 -->
+<!-- Node72&#45;&gt;Node33 -->
 <g id="edge167" class="edge">
-<title>Node73&#45;&gt;Node33</title>
+<title>Node72&#45;&gt;Node33</title>
 <path fill="none" stroke="#191970" d="M1269.9202,-783.4001C1246.2399,-765.9894 1201,-726.7414 1201,-681 1201,-681 1201,-681 1201,-507.5 1201,-467.0993 1220.2594,-423.5127 1234.1209,-397.3742"/>
 <polygon fill="#191970" stroke="#191970" points="1237.2076,-399.0245 1238.9379,-388.5719 1231.067,-395.664 1237.2076,-399.0245"/>
 </g>
-<!-- Node73&#45;&gt;Node25 -->
+<!-- Node72&#45;&gt;Node25 -->
 <g id="edge170" class="edge">
-<title>Node73&#45;&gt;Node25</title>
+<title>Node72&#45;&gt;Node25</title>
 <path fill="none" stroke="#191970" d="M1242.3923,-787.8465C1102.3251,-769.9652 649.7003,-707.7005 524,-635 419.2951,-574.4425 369.038,-566.1205 319,-456 277.6393,-364.9761 284.6167,-299.5107 361,-235 394.6952,-206.5422 693.8468,-194.0087 806.4167,-190.2707"/>
 <polygon fill="#191970" stroke="#191970" points="806.8318,-193.7591 816.7129,-189.9361 806.6045,-186.7628 806.8318,-193.7591"/>
 </g>
-<!-- Node73&#45;&gt;Node17 -->
+<!-- Node72&#45;&gt;Node17 -->
 <g id="edge174" class="edge">
-<title>Node73&#45;&gt;Node17</title>
+<title>Node72&#45;&gt;Node17</title>
 <path fill="none" stroke="#191970" d="M1242.223,-791.7079C1112.5717,-787.4635 718.2808,-772.7289 593,-747 482.6041,-724.328 212.638,-607.442 138,-523 90.3617,-469.1042 76,-445.4317 76,-373.5 76,-373.5 76,-373.5 76,-189 76,-157.7875 75.3623,-142.1626 100,-123 137.9116,-93.5134 472.8436,-77.27 580.1074,-72.79"/>
 <polygon fill="#191970" stroke="#191970" points="580.4841,-76.2776 590.332,-72.37 580.1968,-69.2835 580.4841,-76.2776"/>
 </g>
-<!-- Node73&#45;&gt;Node10 -->
+<!-- Node72&#45;&gt;Node10 -->
 <g id="edge166" class="edge">
-<title>Node73&#45;&gt;Node10</title>
+<title>Node72&#45;&gt;Node10</title>
 <path fill="none" stroke="#191970" d="M1325.6383,-788.8395C1345.9435,-786.8976 1370.7328,-784.6552 1393,-783 1751.4817,-756.3533 2668.7226,-809.7857 3008,-691 3107.4191,-656.192 3158,-612.8364 3158,-507.5 3158,-507.5 3158,-507.5 3158,-440.5 3158,-145.9929 2682.0182,-86.5293 2494.572,-74.5305"/>
 <polygon fill="#191970" stroke="#191970" points="2494.7186,-71.033 2484.5234,-73.9152 2494.2907,-78.0199 2494.7186,-71.033"/>
 </g>
-<!-- Node73&#45;&gt;Node15 -->
+<!-- Node72&#45;&gt;Node15 -->
 <g id="edge172" class="edge">
-<title>Node73&#45;&gt;Node15</title>
+<title>Node72&#45;&gt;Node15</title>
 <path fill="none" stroke="#191970" d="M1325.618,-788.5337C1345.9185,-786.5207 1370.7097,-784.3068 1393,-783 2016.1351,-746.4667 2177.5767,-824.1157 2797,-747 3016.7817,-719.6381 3234,-728.9784 3234,-507.5 3234,-507.5 3234,-507.5 3234,-189 3234,-149.3572 3208.2988,-142.8783 3174,-123 3140.0746,-103.3381 3028.9196,-84.5946 2973.3003,-76.1622"/>
 <polygon fill="#191970" stroke="#191970" points="2973.6817,-72.6804 2963.274,-74.6628 2972.6464,-79.6034 2973.6817,-72.6804"/>
 </g>
-<!-- Node73&#45;&gt;Node34 -->
+<!-- Node72&#45;&gt;Node34 -->
 <g id="edge169" class="edge">
-<title>Node73&#45;&gt;Node34</title>
+<title>Node72&#45;&gt;Node34</title>
 <path fill="none" stroke="#191970" d="M1297.5183,-783.4215C1317.9661,-769.3154 1358.375,-742.9103 1396,-727 1474.9494,-693.6151 2070.6019,-573.3137 2140,-523 2211.1566,-471.4114 2183.1211,-416.1775 2249,-358 2268.0347,-341.1905 2293.5357,-328.4495 2314.5219,-319.8454"/>
 <polygon fill="#191970" stroke="#191970" points="2316.0878,-322.9903 2324.1094,-316.069 2313.5224,-316.4774 2316.0878,-322.9903"/>
 </g>
-<!-- Node73&#45;&gt;Node27 -->
+<!-- Node72&#45;&gt;Node27 -->
 <g id="edge168" class="edge">
-<title>Node73&#45;&gt;Node27</title>
+<title>Node72&#45;&gt;Node27</title>
 <path fill="none" stroke="#191970" d="M1283.3032,-783.1593C1281.5327,-757.2741 1277,-685.1355 1277,-625 1277,-625 1277,-625 1277,-569 1277,-443.6613 1428.1722,-360.7913 1508.3602,-325.6501"/>
 <polygon fill="#191970" stroke="#191970" points="1509.9691,-328.7682 1517.7724,-321.6016 1507.2031,-322.3378 1509.9691,-328.7682"/>
 </g>
-<!-- Node73&#45;&gt;Node32 -->
+<!-- Node72&#45;&gt;Node32 -->
 <g id="edge173" class="edge">
-<title>Node73&#45;&gt;Node32</title>
+<title>Node72&#45;&gt;Node32</title>
 <path fill="none" stroke="#191970" d="M1242.495,-792.6584C1120.949,-791.2326 767.536,-783.9352 659,-747 491.8354,-690.1133 342,-684.0789 342,-507.5 342,-507.5 342,-507.5 342,-373.5 342,-335.3133 345.7158,-323.3539 366,-291 373.3233,-279.3191 384.4352,-268.8059 394.4194,-260.7885"/>
 <polygon fill="#191970" stroke="#191970" points="396.6088,-263.5205 402.4329,-254.6701 392.3608,-257.9567 396.6088,-263.5205"/>
 </g>
-<!-- Node73&#45;&gt;Node46 -->
+<!-- Node72&#45;&gt;Node46 -->
 <g id="edge171" class="edge">
-<title>Node73&#45;&gt;Node46</title>
+<title>Node72&#45;&gt;Node46</title>
 <path fill="none" stroke="#191970" d="M1325.7122,-784.4292C1391.6287,-771.0598 1524.0057,-744.9184 1637,-727 2175.2038,-641.6526 2413.2178,-854.1301 2846,-523 2888.7746,-490.2724 2907.473,-425.6125 2914.4475,-393.1157"/>
 <polygon fill="#191970" stroke="#191970" points="2917.928,-393.5582 2916.4458,-383.0675 2911.0624,-392.1927 2917.928,-393.5582"/>
 </g>
diff --git a/docs/reference/api/doxygen/annotated.html b/docs/reference/api/doxygen/annotated.html
index c3d85a3390..a0d24eeac2 100644
--- a/docs/reference/api/doxygen/annotated.html
+++ b/docs/reference/api/doxygen/annotated.html
@@ -344,217 +344,220 @@ $(function() {
 <tr id="row_1_6_33_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Clause.html" target="_self">Clause</a></td><td class="desc"></td></tr>
 <tr id="row_1_6_34_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ClauseNode.html" target="_self">ClauseNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Clause.html">Clause</a> container node </td></tr>
 <tr id="row_1_6_35_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ClipAttrs.html" target="_self">ClipAttrs</a></td><td class="desc">Attributes for Clip operator </td></tr>
-<tr id="row_1_6_36_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html" target="_self">CompilerAttrs</a></td><td class="desc">Options for the operators used to annotate a compiler </td></tr>
-<tr id="row_1_6_37_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConcatenateAttrs.html" target="_self">ConcatenateAttrs</a></td><td class="desc">Attributes used in concatenate operators </td></tr>
-<tr id="row_1_6_38_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Constant.html" target="_self">Constant</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_39_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantNode.html" target="_self">ConstantNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> tensor type </td></tr>
-<tr id="row_1_6_40_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantPattern.html" target="_self">ConstantPattern</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_41_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantPatternNode.html" target="_self">ConstantPatternNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> </td></tr>
-<tr id="row_1_6_42_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html" target="_self">ConstructorValue</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_43_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConstructorValueObj.html" target="_self">ConstructorValueObj</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_44_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv1DAttrs.html" target="_self">Conv1DAttrs</a></td><td class="desc">Attributes used in 1D convolution operators </td></tr>
-<tr id="row_1_6_45_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html" target="_self">Conv1DTransposeAttrs</a></td><td class="desc">Attributes used in 1D transposed convolution operator </td></tr>
-<tr id="row_1_6_46_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html" target="_self">Conv2DAttrs</a></td><td class="desc">Attributes used in convolution operators </td></tr>
-<tr id="row_1_6_47_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html" target="_self">Conv2DTransposeAttrs</a></td><td class="desc">Attributes used in transposed convolution operator </td></tr>
-<tr id="row_1_6_48_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html" target="_self">Conv2DWinogradAttrs</a></td><td class="desc">Attributes used in convolution operators with winograd algorithm </td></tr>
-<tr id="row_1_6_49_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html" target="_self">Conv2DWinogradNNPACKWeightTransformAttrs</a></td><td class="desc">Attributes used in winograd weight transformation operators </td></tr>
-<tr id="row_1_6_50_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html" target="_self">Conv3DAttrs</a></td><td class="desc">Attributes used in convolution operators </td></tr>
-<tr id="row_1_6_51_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html" target="_self">Conv3DTransposeAttrs</a></td><td class="desc">Attributes used in transposed convolution operator </td></tr>
-<tr id="row_1_6_52_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html" target="_self">Conv3DWinogradAttrs</a></td><td class="desc">Attributes used in 3d winograd convolution operators </td></tr>
-<tr id="row_1_6_53_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html" target="_self">ConvGemmWeightTransformAttrs</a></td><td class="desc">Attributes used in gemm weight transformation operators </td></tr>
-<tr id="row_1_6_54_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html" target="_self">ConvWinogradWeightTransformAttrs</a></td><td class="desc">Attributes used in winograd weight transformation operators </td></tr>
-<tr id="row_1_6_55_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1CorrelationAttrs.html" target="_self">CorrelationAttrs</a></td><td class="desc">Attributes used in correlation operators </td></tr>
-<tr id="row_1_6_56_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1CropAndResizeAttrs.html" target="_self">CropAndResizeAttrs</a></td><td class="desc">Attributes used in image crop_and_resize operator </td></tr>
-<tr id="row_1_6_57_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DataTypePattern.html" target="_self">DataTypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
-<tr id="row_1_6_58_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DataTypePatternNode.html" target="_self">DataTypePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Types </td></tr>
-<tr id="row_1_6_59_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DebugAttrs.html" target="_self">DebugAttrs</a></td><td class="desc">Options for the debug operators </td></tr>
-<tr id="row_1_6_60_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DeformableConv2DAttrs.html" target="_self">DeformableConv2DAttrs</a></td><td class="desc">Attributes for DeformableConv2D operator </td></tr>
-<tr id="row_1_6_61_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DenseAttrs.html" target="_self">DenseAttrs</a></td><td class="desc">Attributes for dense operator </td></tr>
-<tr id="row_1_6_62_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DensePackAttrs.html" target="_self">DensePackAttrs</a></td><td class="desc">Attributes for dense_pack operator </td></tr>
-<tr id="row_1_6_63_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DeviceCopyAttrs.html" target="_self">DeviceCopyAttrs</a></td><td class="desc">Options for the device copy operators </td></tr>
-<tr id="row_1_6_64_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPattern.html" target="_self">DFPattern</a></td><td class="desc">Managed reference to dataflow patterns </td></tr>
-<tr id="row_1_6_65_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternCallback.html" target="_self">DFPatternCallback</a></td><td class="desc">Managed reference to dataflow pattern callbacks </td></tr>
-<tr id="row_1_6_66_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternCallbackNode.html" target="_self">DFPatternCallbackNode</a></td><td class="desc">Base type of all dataflow pattern callbacks </td></tr>
-<tr id="row_1_6_67_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor.html" target="_self">DFPatternFunctor</a></td><td class="desc">A dynamical functor that dispatches on in the first <a class="el" href="classtvm_1_1relay_1_1DFPattern.html" title="Managed reference to dataflow patterns. ">DFPattern</a> argument </ [...]
-<tr id="row_1_6_68_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor_3_01R_07const_01DFPattern_01_6n_00_01Args_8_8_8_08_4.html" target="_self">DFPatternFunctor&lt; R(const DFPattern &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_69_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternNode.html" target="_self">DFPatternNode</a></td><td class="desc">Base type of all dataflow patterns </td></tr>
-<tr id="row_1_6_70_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternVisitor.html" target="_self">DFPatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor.html" title="A dynamical functor that dispatches on in the first DFPattern argument. ">DFPatt [...]
-<tr id="row_1_6_71_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DilateAttrs.html" target="_self">DilateAttrs</a></td><td class="desc">Attributes used in dilate operator </td></tr>
-<tr id="row_1_6_72_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html" target="_self">Dilation2DAttrs</a></td><td class="desc">Attributes used in dilation operators </td></tr>
-<tr id="row_1_6_73_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html" target="_self">DominatorPattern</a></td><td class="desc">A pattern which matches a variable length dominator path </td></tr>
-<tr id="row_1_6_74_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DominatorPatternNode.html" target="_self">DominatorPatternNode</a></td><td class="desc">Dominated Graph <a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> <a class="el" href="cla [...]
-<tr id="row_1_6_75_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html" target="_self">DropoutAttrs</a></td><td class="desc">Attributes used in dropout operator </td></tr>
-<tr id="row_1_6_76_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DynExpandDimsAttrs.html" target="_self">DynExpandDimsAttrs</a></td><td class="desc">Attributes used in dynamic expand_dims operators </td></tr>
-<tr id="row_1_6_77_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1EinsumAttrs.html" target="_self">EinsumAttrs</a></td><td class="desc">Attributes used in einsum operator </td></tr>
-<tr id="row_1_6_78_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Executor.html" target="_self">Executor</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html" title="Executor information. ">ExecutorNode</a> </td></tr>
-<tr id="row_1_6_79_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html" target="_self">ExecutorNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Executor.html" title="Managed reference class to ExecutorNode. ">Executor</a> information </td></tr>
-<tr id="row_1_6_80_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExecutorRegEntry.html" target="_self">ExecutorRegEntry</a></td><td class="desc">Helper structure to register Executors </td></tr>
-<tr id="row_1_6_81_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ExpandDimsAttrs.html" target="_self">ExpandDimsAttrs</a></td><td class="desc">Attributes used in expand_dims operators </td></tr>
-<tr id="row_1_6_82_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" target="_self">ExprFunctor</a></td><td class="desc">A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerful Visitor, since it allows you to define function signatures of Visit <a class="el" href="cl [...]
-<tr id="row_1_6_83_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html" target="_self">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_84_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html" target="_self">ExprMutator</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">Expr [...]
-<tr id="row_1_6_85_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html" target="_self">ExprPattern</a></td><td class="desc">A pattern which matches a literal expression </td></tr>
-<tr id="row_1_6_86_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprPatternNode.html" target="_self">ExprPatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Relay Expression </td></tr>
-<tr id="row_1_6_87_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html" target="_self">ExprRewriter</a></td><td class="desc">A non-iterating Expression Rewriter </td></tr>
-<tr id="row_1_6_88_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html" target="_self">ExprVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more p [...]
-<tr id="row_1_6_89_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FeatureSet.html" target="_self">FeatureSet</a></td><td class="desc">A finite set of Feature </td></tr>
-<tr id="row_1_6_90_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1FIFOBufferAttrs.html" target="_self">FIFOBufferAttrs</a></td><td class="desc">Attributes for FIFO buffer operator </td></tr>
-<tr id="row_1_6_91_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1FixedPointMultiplyAttrs.html" target="_self">FixedPointMultiplyAttrs</a></td><td class="desc">Attributes for FixedPointMultiply operator </td></tr>
-<tr id="row_1_6_92_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1FixedPointMultiplyPerAxisAttrs.html" target="_self">FixedPointMultiplyPerAxisAttrs</a></td><td class="desc">Attributes for per channel/per axes FixedPointMultiply operator </td></tr>
-<tr id="row_1_6_93_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Function.html" target="_self">Function</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
-<tr id="row_1_6_94_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" target="_self">FunctionNode</a></td><td class="desc">Relay <a class="el" href="classtvm_1_1relay_1_1Function.html" title="Managed reference to FunctionNode. ">Function</a> container </td></tr>
-<tr id="row_1_6_95_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionPattern.html" target="_self">FunctionPattern</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
-<tr id="row_1_6_96_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionPatternNode.html" target="_self">FunctionPatternNode</a></td><td class="desc">Relay <a class="el" href="classtvm_1_1relay_1_1Function.html" title="Managed reference to FunctionNode. ">Function</a> container </td></tr>
-<tr id="row_1_6_97_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GatherAttrs.html" target="_self">GatherAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_98_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GatherNDAttrs.html" target="_self">GatherNDAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_99_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html" target="_self">GetValidCountsAttrs</a></td><td class="desc">Attributes used in get_valid_counts operator </td></tr>
-<tr id="row_1_6_100_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html" target="_self">GlobalPool2DAttrs</a></td><td class="desc">Attributes for global pool operator </td></tr>
-<tr id="row_1_6_101_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GridSampleAttrs.html" target="_self">GridSampleAttrs</a></td><td class="desc">Attributes used in image grid_sample operator </td></tr>
-<tr id="row_1_6_102_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GroupNormAttrs.html" target="_self">GroupNormAttrs</a></td><td class="desc">Attributes used in group_norm operator </td></tr>
-<tr id="row_1_6_103_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Id.html" target="_self">Id</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_104_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IdNode.html" target="_self">IdNode</a></td><td class="desc">The unique identifier of variables </td></tr>
-<tr id="row_1_6_105_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1If.html" target="_self">If</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_106_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfNode.html" target="_self">IfNode</a></td><td class="desc">Container of <a class="el" href="classtvm_1_1relay_1_1If.html">If</a> </td></tr>
-<tr id="row_1_6_107_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfPattern.html" target="_self">IfPattern</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_108_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfPatternNode.html" target="_self">IfPatternNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_109_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html" target="_self">InitOpAttrs</a></td><td class="desc">Attributes that specify a tensor </td></tr>
-<tr id="row_1_6_110_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html" target="_self">InstanceNormAttrs</a></td><td class="desc">Attributes used in instance_norm operator </td></tr>
-<tr id="row_1_6_111_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html" target="_self">InterpreterClosure</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_112_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html" target="_self">InterpreterClosureObj</a></td><td class="desc">The container type of Closures used by the interpreter </td></tr>
-<tr id="row_1_6_113_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html" target="_self">L2NormalizeAttrs</a></td><td class="desc">Attributes for L2Normalize operator </td></tr>
-<tr id="row_1_6_114_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html" target="_self">LayerNormAttrs</a></td><td class="desc">Attributes used in layer_norm operator </td></tr>
-<tr id="row_1_6_115_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html" target="_self">LayoutTransformAttrs</a></td><td class="desc">Attributes for LayoutTransform operator </td></tr>
-<tr id="row_1_6_116_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html" target="_self">LeakyReluAttrs</a></td><td class="desc">Attributes for leaky relu operator </td></tr>
-<tr id="row_1_6_117_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Let.html" target="_self">Let</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_118_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetNode.html" target="_self">LetNode</a></td><td class="desc">A binding of a sub-network </td></tr>
-<tr id="row_1_6_119_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetPattern.html" target="_self">LetPattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> binding that binds a local var </td></tr>
-<tr id="row_1_6_120_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetPatternNode.html" target="_self">LetPatternNode</a></td><td class="desc">A binding of a sub-network </td></tr>
-<tr id="row_1_6_121_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html" target="_self">LRNAttrs</a></td><td class="desc">Attributes for LRN operator </td></tr>
-<tr id="row_1_6_122_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Match.html" target="_self">Match</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_123_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MatchNode.html" target="_self">MatchNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Match.html">Match</a> container node </td></tr>
-<tr id="row_1_6_124_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MatmulAttrs.html" target="_self">MatmulAttrs</a></td><td class="desc">Attributes for matmul operator </td></tr>
-<tr id="row_1_6_125_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html" target="_self">MatrixSetDiagAttrs</a></td><td class="desc">Attributes used in matrix_set_diag operator </td></tr>
-<tr id="row_1_6_126_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html" target="_self">MaxPool1DAttrs</a></td><td class="desc">Attributes for 1D max pool operator </td></tr>
-<tr id="row_1_6_127_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html" target="_self">MaxPool2DAttrs</a></td><td class="desc">Attributes for max pool operator </td></tr>
-<tr id="row_1_6_128_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html" target="_self">MaxPool3DAttrs</a></td><td class="desc">Attributes for 3D max pool operator </td></tr>
-<tr id="row_1_6_129_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MeshgridAttrs.html" target="_self">MeshgridAttrs</a></td><td class="desc">Attributes used in meshgrid operators </td></tr>
-<tr id="row_1_6_130_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MetaScheduleLayoutTransformAttrs.html" target="_self">MetaScheduleLayoutTransformAttrs</a></td><td class="desc">Attributes for MetaScheduleLayoutTransform operator </td></tr>
-<tr id="row_1_6_131_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html" target="_self">MirrorPadAttrs</a></td><td class="desc">Attributes used for the MirrorPadding operator </td></tr>
-<tr id="row_1_6_132_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html" target="_self">MixedModeMutator</a></td><td class="desc">Non-recursive DFS Graph Traversal for Custom Rewriting Passes </td></tr>
-<tr id="row_1_6_133_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html" target="_self">MixedModeVisitor</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html" title="A simple visitor wrapper around ExprFunctor. Recursively visit the content. ">ExprVisitor</a> which [...]
-<tr id="row_1_6_134_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html" target="_self">MultiBoxPriorAttrs</a></td><td class="desc">Attributes used in multibox_prior operators </td></tr>
-<tr id="row_1_6_135_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html" target="_self">MultiBoxTransformLocAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_136_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultinomialAttrs.html" target="_self">MultinomialAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_137_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html" target="_self">NdarraySizeAttrs</a></td><td class="desc">Attributes for ndarray_size operator </td></tr>
-<tr id="row_1_6_138_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NLLLossAttrs.html" target="_self">NLLLossAttrs</a></td><td class="desc">Attributes used in NLLLoss operator </td></tr>
-<tr id="row_1_6_139_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html" target="_self">NonMaximumSuppressionAttrs</a></td><td class="desc">Attributes used in non_maximum_suppression operator </td></tr>
-<tr id="row_1_6_140_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NormalAttrs.html" target="_self">NormalAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_141_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html" target="_self">OnDeviceAttrs</a></td><td class="desc">Attributes for the "on_device" annotation (ie operator) </td></tr>
-<tr id="row_1_6_142_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html" target="_self">OneHotAttrs</a></td><td class="desc">Attributes used in one-hot operator </td></tr>
-<tr id="row_1_6_143_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html" target="_self">OpImplementation</a></td><td class="desc">Operator implementation class </td></tr>
-<tr id="row_1_6_144_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html" target="_self">OpImplementationNode</a></td><td class="desc">Operator implementation that includes compute and schedule function </td></tr>
-<tr id="row_1_6_145_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html" target="_self">OpSpecialization</a></td><td class="desc">Operator specialization class </td></tr>
-<tr id="row_1_6_146_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html" target="_self">OpSpecializationNode</a></td><td class="desc">Specialized implementations for operators under certain conditions </td></tr>
-<tr id="row_1_6_147_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html" target="_self">OpStrategy</a></td><td class="desc">Operator strategy class </td></tr>
-<tr id="row_1_6_148_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html" target="_self">OpStrategyNode</a></td><td class="desc">Operator strategy to choose implementation </td></tr>
-<tr id="row_1_6_149_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html" target="_self">PadAttrs</a></td><td class="desc">Attributes used for the padding operator </td></tr>
-<tr id="row_1_6_150_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Pattern.html" target="_self">Pattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> is the base type for an ADT match pattern in Relay </td></tr>
-<tr id="row_1_6_151_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html" target="_self">PatternConstructor</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_152_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html" target="_self">PatternConstructorNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
-<tr id="row_1_6_153_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" target="_self">PatternFunctor</a></td><td class="desc">A dynamical functor on ADT patterns that dispatches on its first argument. You can use this as a more powerful visitor, since it allows you to define the types of further arguments to Vi [...]
-<tr id="row_1_6_154_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html" target="_self">PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_155_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html" target="_self">PatternMutator</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu.. [...]
-<tr id="row_1_6_156_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternNode.html" target="_self">PatternNode</a></td><td class="desc">Base type for declaring relay pattern </td></tr>
-<tr id="row_1_6_157_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html" target="_self">PatternTuple</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_158_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html" target="_self">PatternTupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
-<tr id="row_1_6_159_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVar.html" target="_self">PatternVar</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_160_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html" target="_self">PatternVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
-<tr id="row_1_6_161_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html" target="_self">PatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" title="A dynamical functor on ADT patterns that dispatches on its first argument. You can us [...]
-<tr id="row_1_6_162_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html" target="_self">PatternWildcard</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_163_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcardNode.html" target="_self">PatternWildcardNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> container node </td></tr>
-<tr id="row_1_6_164_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PReluAttrs.html" target="_self">PReluAttrs</a></td><td class="desc">Attributes for prelu operator </td></tr>
-<tr id="row_1_6_165_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ProposalAttrs.html" target="_self">ProposalAttrs</a></td><td class="desc">Attributes used in proposal operators </td></tr>
-<tr id="row_1_6_166_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosure.html" target="_self">RecClosure</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_167_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosureObj.html" target="_self">RecClosureObj</a></td><td class="desc">The container type of <a class="el" href="classtvm_1_1relay_1_1RecClosure.html">RecClosure</a> </td></tr>
-<tr id="row_1_6_168_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReduceAttrs.html" target="_self">ReduceAttrs</a></td><td class="desc">Attributes for Reduce operators </td></tr>
-<tr id="row_1_6_169_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreate.html" target="_self">RefCreate</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_170_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreateNode.html" target="_self">RefCreateNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_171_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefRead.html" target="_self">RefRead</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_172_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefReadNode.html" target="_self">RefReadNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_173_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefValue.html" target="_self">RefValue</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_174_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RefValueObj.html" target="_self">RefValueObj</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_175_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWrite.html" target="_self">RefWrite</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_176_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWriteNode.html" target="_self">RefWriteNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_177_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RelayNode.html" target="_self">RelayNode</a></td><td class="desc">This is the base node container of all relay structures </td></tr>
-<tr id="row_1_6_178_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RepeatAttrs.html" target="_self">RepeatAttrs</a></td><td class="desc">Attributes used in repeat operators </td></tr>
-<tr id="row_1_6_179_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeAttrs.html" target="_self">ReshapeAttrs</a></td><td class="desc">Attributes used in reshape operators </td></tr>
-<tr id="row_1_6_180_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeLikeAttrs.html" target="_self">ReshapeLikeAttrs</a></td><td class="desc">Attributes used in MXNet-style reshape_like operators </td></tr>
-<tr id="row_1_6_181_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeTensorAttrs.html" target="_self">ReshapeTensorAttrs</a></td><td class="desc">Attributes for VM reshape_tensor operator </td></tr>
-<tr id="row_1_6_182_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Resize1DAttrs.html" target="_self">Resize1DAttrs</a></td><td class="desc">Attributes used in image resize1d operator </td></tr>
-<tr id="row_1_6_183_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Resize2DAttrs.html" target="_self">Resize2DAttrs</a></td><td class="desc">Attributes used in image resize2d operator </td></tr>
-<tr id="row_1_6_184_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Resize3DAttrs.html" target="_self">Resize3DAttrs</a></td><td class="desc">Attributes used in image resize3d operator </td></tr>
-<tr id="row_1_6_185_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReverseAttrs.html" target="_self">ReverseAttrs</a></td><td class="desc">Attributes used in reverse operators </td></tr>
-<tr id="row_1_6_186_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReverseSequenceAttrs.html" target="_self">ReverseSequenceAttrs</a></td><td class="desc">Attributes used in reverse_sequence operators </td></tr>
-<tr id="row_1_6_187_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIAlignAttrs.html" target="_self">ROIAlignAttrs</a></td><td class="desc">Attributes used in roi_align operators </td></tr>
-<tr id="row_1_6_188_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIPoolAttrs.html" target="_self">ROIPoolAttrs</a></td><td class="desc">Attributes used in roi_pool operators </td></tr>
-<tr id="row_1_6_189_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Runtime.html" target="_self">Runtime</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html" title="Runtime information. ">RuntimeNode</a> </td></tr>
-<tr id="row_1_6_190_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html" target="_self">RuntimeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Runtime.html" title="Managed reference class to RuntimeNode. ">Runtime</a> information </td></tr>
-<tr id="row_1_6_191_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RuntimeRegEntry.html" target="_self">RuntimeRegEntry</a></td><td class="desc">Helper structure to register Runtimes </td></tr>
-<tr id="row_1_6_192_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScanopAttrs.html" target="_self">ScanopAttrs</a></td><td class="desc">Attributes used in cumsum and cumprod operator </td></tr>
-<tr id="row_1_6_193_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScatterAddAttrs.html" target="_self">ScatterAddAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_194_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScatterAttrs.html" target="_self">ScatterAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_195_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScatterNDAttrs.html" target="_self">ScatterNDAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_196_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SearchSortedAttrs.html" target="_self">SearchSortedAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_197_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SequenceMaskAttrs.html" target="_self">SequenceMaskAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_198_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeFuncAttrs.html" target="_self">ShapeFuncAttrs</a></td><td class="desc">Options for the shape function operator </td></tr>
-<tr id="row_1_6_199_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeOfAttrs.html" target="_self">ShapeOfAttrs</a></td><td class="desc">Attributes for ShapeOf operator </td></tr>
-<tr id="row_1_6_200_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ShapePattern.html" target="_self">ShapePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
-<tr id="row_1_6_201_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ShapePatternNode.html" target="_self">ShapePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Shapes </td></tr>
-<tr id="row_1_6_202_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SliceLikeAttrs.html" target="_self">SliceLikeAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_203_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SlidingWindowAttrs.html" target="_self">SlidingWindowAttrs</a></td><td class="desc">Attributes used for the sliding_window operator </td></tr>
-<tr id="row_1_6_204_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SoftmaxAttrs.html" target="_self">SoftmaxAttrs</a></td><td class="desc">Attributes used in softmax operators </td></tr>
-<tr id="row_1_6_205_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SpaceToBatchNDAttrs.html" target="_self">SpaceToBatchNDAttrs</a></td><td class="desc">Attributes used in SpaceToBatchND operator </td></tr>
-<tr id="row_1_6_206_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseConv2DAttrs.html" target="_self">SparseConv2DAttrs</a></td><td class="desc">Attributes for sparse_dense operator </td></tr>
-<tr id="row_1_6_207_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseDenseAttrs.html" target="_self">SparseDenseAttrs</a></td><td class="desc">Attributes for sparse_dense operator </td></tr>
-<tr id="row_1_6_208_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseToDenseAttrs.html" target="_self">SparseToDenseAttrs</a></td><td class="desc">Attributes used in sparse_to_dense operator </td></tr>
-<tr id="row_1_6_209_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseTransposeAttrs.html" target="_self">SparseTransposeAttrs</a></td><td class="desc">Attributes for sparse_transpose operator </td></tr>
-<tr id="row_1_6_210_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SplitAttrs.html" target="_self">SplitAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_211_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SqueezeAttrs.html" target="_self">SqueezeAttrs</a></td><td class="desc">Attributes used in squeeze operators </td></tr>
-<tr id="row_1_6_212_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StackAttrs.html" target="_self">StackAttrs</a></td><td class="desc">Attributes used in stack operators </td></tr>
-<tr id="row_1_6_213_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StftAttrs.html" target="_self">StftAttrs</a></td><td class="desc">Attributes used in stft operator </td></tr>
-<tr id="row_1_6_214_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StridedSliceAttrs.html" target="_self">StridedSliceAttrs</a></td><td class="desc">Attributes for StridedSlice operator </td></tr>
-<tr id="row_1_6_215_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SubPixelAttrs.html" target="_self">SubPixelAttrs</a></td><td class="desc">Attributes used in subpixel operators </td></tr>
-<tr id="row_1_6_216_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TakeAttrs.html" target="_self">TakeAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_217_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExpr.html" target="_self">TempExpr</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_218_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExprNode.html" target="_self">TempExprNode</a></td><td class="desc">Base class of the temporary expression </td></tr>
-<tr id="row_1_6_219_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ThreefryGenerateAttrs.html" target="_self">ThreefryGenerateAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_220_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TileAttrs.html" target="_self">TileAttrs</a></td><td class="desc">Attributes used in tile operators </td></tr>
-<tr id="row_1_6_221_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TopKAttrs.html" target="_self">TopKAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_222_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TransposeAttrs.html" target="_self">TransposeAttrs</a></td><td class="desc">Attributes used in transpose operators </td></tr>
-<tr id="row_1_6_223_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TriluAttrs.html" target="_self">TriluAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_224_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Tuple.html" target="_self">Tuple</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_225_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html" target="_self">TupleGetItem</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_226_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemNode.html" target="_self">TupleGetItemNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_227_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemPattern.html" target="_self">TupleGetItemPattern</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_228_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemPatternNode.html" target="_self">TupleGetItemPatternNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_229_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleNode.html" target="_self">TupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Tuple.html">Tuple</a> container </td></tr>
-<tr id="row_1_6_230_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TuplePattern.html" target="_self">TuplePattern</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_231_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TuplePatternNode.html" target="_self">TuplePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Tuple.html">Tuple</a> container </td></tr>
-<tr id="row_1_6_232_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TypePattern.html" target="_self">TypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
-<tr id="row_1_6_233_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TypePatternNode.html" target="_self">TypePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Types </td></tr>
-<tr id="row_1_6_234_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UniformAttrs.html" target="_self">UniformAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_235_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UniqueAttrs.html" target="_self">UniqueAttrs</a></td><td class="desc">Attributes used in unique operator </td></tr>
-<tr id="row_1_6_236_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSampling3DAttrs.html" target="_self">UpSampling3DAttrs</a></td><td class="desc">Attributes for upsampling3d operator </td></tr>
-<tr id="row_1_6_237_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSamplingAttrs.html" target="_self">UpSamplingAttrs</a></td><td class="desc">Attributes for upsampling operator </td></tr>
-<tr id="row_1_6_238_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1v__info.html" target="_self">v_info</a></td><td class="desc">A struct to keep info of traversed expr in ExpandDataflow function </td></tr>
-<tr id="row_1_6_239_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Var.html" target="_self">Var</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_240_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1VarianceAttrs.html" target="_self">VarianceAttrs</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_241_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarNode.html" target="_self">VarNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Var.html">Var</a> </td></tr>
-<tr id="row_1_6_242_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarPattern.html" target="_self">VarPattern</a></td><td class="desc"></td></tr>
-<tr id="row_1_6_243_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarPatternNode.html" target="_self">VarPatternNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Var.html">Var</a> </td></tr>
-<tr id="row_1_6_244_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1WildcardPattern.html" target="_self">WildcardPattern</a></td><td class="desc">A pattern which matches anything </td></tr>
-<tr id="row_1_6_245_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1WildcardPatternNode.html" target="_self">WildcardPatternNode</a></td><td class="desc">Wildcard <a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> </td></tr>
-<tr id="row_1_6_246_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1YoloReorgAttrs.html" target="_self">YoloReorgAttrs</a></td><td class="desc">Attributes used in yolo reorg operators </td></tr>
+<tr id="row_1_6_36_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1CompileError.html" target="_self">CompileError</a></td><td class="desc">Custom Error class to be thrown during compilation </td></tr>
+<tr id="row_1_6_37_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1CompilerAttrs.html" target="_self">CompilerAttrs</a></td><td class="desc">Options for the operators used to annotate a compiler </td></tr>
+<tr id="row_1_6_38_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConcatenateAttrs.html" target="_self">ConcatenateAttrs</a></td><td class="desc">Attributes used in concatenate operators </td></tr>
+<tr id="row_1_6_39_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Constant.html" target="_self">Constant</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_40_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantNode.html" target="_self">ConstantNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> tensor type </td></tr>
+<tr id="row_1_6_41_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantPattern.html" target="_self">ConstantPattern</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_42_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstantPatternNode.html" target="_self">ConstantPatternNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Constant.html">Constant</a> </td></tr>
+<tr id="row_1_6_43_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ConstructorValue.html" target="_self">ConstructorValue</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_44_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConstructorValueObj.html" target="_self">ConstructorValueObj</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_45_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv1DAttrs.html" target="_self">Conv1DAttrs</a></td><td class="desc">Attributes used in 1D convolution operators </td></tr>
+<tr id="row_1_6_46_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv1DTransposeAttrs.html" target="_self">Conv1DTransposeAttrs</a></td><td class="desc">Attributes used in 1D transposed convolution operator </td></tr>
+<tr id="row_1_6_47_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DAttrs.html" target="_self">Conv2DAttrs</a></td><td class="desc">Attributes used in convolution operators </td></tr>
+<tr id="row_1_6_48_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DTransposeAttrs.html" target="_self">Conv2DTransposeAttrs</a></td><td class="desc">Attributes used in transposed convolution operator </td></tr>
+<tr id="row_1_6_49_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradAttrs.html" target="_self">Conv2DWinogradAttrs</a></td><td class="desc">Attributes used in convolution operators with winograd algorithm </td></tr>
+<tr id="row_1_6_50_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv2DWinogradNNPACKWeightTransformAttrs.html" target="_self">Conv2DWinogradNNPACKWeightTransformAttrs</a></td><td class="desc">Attributes used in winograd weight transformation operators </td></tr>
+<tr id="row_1_6_51_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv3DAttrs.html" target="_self">Conv3DAttrs</a></td><td class="desc">Attributes used in convolution operators </td></tr>
+<tr id="row_1_6_52_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv3DTransposeAttrs.html" target="_self">Conv3DTransposeAttrs</a></td><td class="desc">Attributes used in transposed convolution operator </td></tr>
+<tr id="row_1_6_53_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Conv3DWinogradAttrs.html" target="_self">Conv3DWinogradAttrs</a></td><td class="desc">Attributes used in 3d winograd convolution operators </td></tr>
+<tr id="row_1_6_54_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConvGemmWeightTransformAttrs.html" target="_self">ConvGemmWeightTransformAttrs</a></td><td class="desc">Attributes used in gemm weight transformation operators </td></tr>
+<tr id="row_1_6_55_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ConvWinogradWeightTransformAttrs.html" target="_self">ConvWinogradWeightTransformAttrs</a></td><td class="desc">Attributes used in winograd weight transformation operators </td></tr>
+<tr id="row_1_6_56_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1CorrelationAttrs.html" target="_self">CorrelationAttrs</a></td><td class="desc">Attributes used in correlation operators </td></tr>
+<tr id="row_1_6_57_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1CropAndResizeAttrs.html" target="_self">CropAndResizeAttrs</a></td><td class="desc">Attributes used in image crop_and_resize operator </td></tr>
+<tr id="row_1_6_58_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DataTypePattern.html" target="_self">DataTypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
+<tr id="row_1_6_59_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DataTypePatternNode.html" target="_self">DataTypePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Types </td></tr>
+<tr id="row_1_6_60_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DebugAttrs.html" target="_self">DebugAttrs</a></td><td class="desc">Options for the debug operators </td></tr>
+<tr id="row_1_6_61_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DeformableConv2DAttrs.html" target="_self">DeformableConv2DAttrs</a></td><td class="desc">Attributes for DeformableConv2D operator </td></tr>
+<tr id="row_1_6_62_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DenseAttrs.html" target="_self">DenseAttrs</a></td><td class="desc">Attributes for dense operator </td></tr>
+<tr id="row_1_6_63_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DensePackAttrs.html" target="_self">DensePackAttrs</a></td><td class="desc">Attributes for dense_pack operator </td></tr>
+<tr id="row_1_6_64_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DeviceCopyAttrs.html" target="_self">DeviceCopyAttrs</a></td><td class="desc">Options for the device copy operators </td></tr>
+<tr id="row_1_6_65_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPattern.html" target="_self">DFPattern</a></td><td class="desc">Managed reference to dataflow patterns </td></tr>
+<tr id="row_1_6_66_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternCallback.html" target="_self">DFPatternCallback</a></td><td class="desc">Managed reference to dataflow pattern callbacks </td></tr>
+<tr id="row_1_6_67_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternCallbackNode.html" target="_self">DFPatternCallbackNode</a></td><td class="desc">Base type of all dataflow pattern callbacks </td></tr>
+<tr id="row_1_6_68_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor.html" target="_self">DFPatternFunctor</a></td><td class="desc">A dynamical functor that dispatches on in the first <a class="el" href="classtvm_1_1relay_1_1DFPattern.html" title="Managed reference to dataflow patterns. ">DFPattern</a> argument </ [...]
+<tr id="row_1_6_69_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor_3_01R_07const_01DFPattern_01_6n_00_01Args_8_8_8_08_4.html" target="_self">DFPatternFunctor&lt; R(const DFPattern &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_70_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternNode.html" target="_self">DFPatternNode</a></td><td class="desc">Base type of all dataflow patterns </td></tr>
+<tr id="row_1_6_71_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DFPatternVisitor.html" target="_self">DFPatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1DFPatternFunctor.html" title="A dynamical functor that dispatches on in the first DFPattern argument. ">DFPatt [...]
+<tr id="row_1_6_72_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DilateAttrs.html" target="_self">DilateAttrs</a></td><td class="desc">Attributes used in dilate operator </td></tr>
+<tr id="row_1_6_73_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Dilation2DAttrs.html" target="_self">Dilation2DAttrs</a></td><td class="desc">Attributes used in dilation operators </td></tr>
+<tr id="row_1_6_74_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DominatorPattern.html" target="_self">DominatorPattern</a></td><td class="desc">A pattern which matches a variable length dominator path </td></tr>
+<tr id="row_1_6_75_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1DominatorPatternNode.html" target="_self">DominatorPatternNode</a></td><td class="desc">Dominated Graph <a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> <a class="el" href="cla [...]
+<tr id="row_1_6_76_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DropoutAttrs.html" target="_self">DropoutAttrs</a></td><td class="desc">Attributes used in dropout operator </td></tr>
+<tr id="row_1_6_77_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1DynExpandDimsAttrs.html" target="_self">DynExpandDimsAttrs</a></td><td class="desc">Attributes used in dynamic expand_dims operators </td></tr>
+<tr id="row_1_6_78_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1EinsumAttrs.html" target="_self">EinsumAttrs</a></td><td class="desc">Attributes used in einsum operator </td></tr>
+<tr id="row_1_6_79_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ErrorBuilder.html" target="_self">ErrorBuilder</a></td><td class="desc">A wrapper around std::stringstream to build error </td></tr>
+<tr id="row_1_6_80_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ErrorReporter.html" target="_self">ErrorReporter</a></td><td class="desc">An abstraction around how errors are stored and reported. Designed to be opaque to users, so we can support a robust and simpler error reporting mode, as well as a more complex mode </td></tr>
+<tr id="row_1_6_81_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Executor.html" target="_self">Executor</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html" title="Executor information. ">ExecutorNode</a> </td></tr>
+<tr id="row_1_6_82_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExecutorNode.html" target="_self">ExecutorNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Executor.html" title="Managed reference class to ExecutorNode. ">Executor</a> information </td></tr>
+<tr id="row_1_6_83_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExecutorRegEntry.html" target="_self">ExecutorRegEntry</a></td><td class="desc">Helper structure to register Executors </td></tr>
+<tr id="row_1_6_84_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ExpandDimsAttrs.html" target="_self">ExpandDimsAttrs</a></td><td class="desc">Attributes used in expand_dims operators </td></tr>
+<tr id="row_1_6_85_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" target="_self">ExprFunctor</a></td><td class="desc">A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerful Visitor, since it allows you to define function signatures of Visit <a class="el" href="cl [...]
+<tr id="row_1_6_86_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprFunctor_3_01R_07const_01Expr_01_6n_00_01Args_8_8_8_08_4.html" target="_self">ExprFunctor&lt; R(const Expr &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_87_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprMutator.html" target="_self">ExprMutator</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu...">Expr [...]
+<tr id="row_1_6_88_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprPattern.html" target="_self">ExprPattern</a></td><td class="desc">A pattern which matches a literal expression </td></tr>
+<tr id="row_1_6_89_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprPatternNode.html" target="_self">ExprPatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Relay Expression </td></tr>
+<tr id="row_1_6_90_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprRewriter.html" target="_self">ExprRewriter</a></td><td class="desc">A non-iterating Expression Rewriter </td></tr>
+<tr id="row_1_6_91_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html" target="_self">ExprVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more p [...]
+<tr id="row_1_6_92_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FeatureSet.html" target="_self">FeatureSet</a></td><td class="desc">A finite set of Feature </td></tr>
+<tr id="row_1_6_93_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1FIFOBufferAttrs.html" target="_self">FIFOBufferAttrs</a></td><td class="desc">Attributes for FIFO buffer operator </td></tr>
+<tr id="row_1_6_94_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1FixedPointMultiplyAttrs.html" target="_self">FixedPointMultiplyAttrs</a></td><td class="desc">Attributes for FixedPointMultiply operator </td></tr>
+<tr id="row_1_6_95_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1FixedPointMultiplyPerAxisAttrs.html" target="_self">FixedPointMultiplyPerAxisAttrs</a></td><td class="desc">Attributes for per channel/per axes FixedPointMultiply operator </td></tr>
+<tr id="row_1_6_96_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Function.html" target="_self">Function</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
+<tr id="row_1_6_97_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" target="_self">FunctionNode</a></td><td class="desc">Relay <a class="el" href="classtvm_1_1relay_1_1Function.html" title="Managed reference to FunctionNode. ">Function</a> container </td></tr>
+<tr id="row_1_6_98_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionPattern.html" target="_self">FunctionPattern</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1relay_1_1FunctionNode.html" title="Relay Function container. ">FunctionNode</a> </td></tr>
+<tr id="row_1_6_99_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1FunctionPatternNode.html" target="_self">FunctionPatternNode</a></td><td class="desc">Relay <a class="el" href="classtvm_1_1relay_1_1Function.html" title="Managed reference to FunctionNode. ">Function</a> container </td></tr>
+<tr id="row_1_6_100_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GatherAttrs.html" target="_self">GatherAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_101_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GatherNDAttrs.html" target="_self">GatherNDAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_102_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GetValidCountsAttrs.html" target="_self">GetValidCountsAttrs</a></td><td class="desc">Attributes used in get_valid_counts operator </td></tr>
+<tr id="row_1_6_103_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GlobalPool2DAttrs.html" target="_self">GlobalPool2DAttrs</a></td><td class="desc">Attributes for global pool operator </td></tr>
+<tr id="row_1_6_104_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GridSampleAttrs.html" target="_self">GridSampleAttrs</a></td><td class="desc">Attributes used in image grid_sample operator </td></tr>
+<tr id="row_1_6_105_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1GroupNormAttrs.html" target="_self">GroupNormAttrs</a></td><td class="desc">Attributes used in group_norm operator </td></tr>
+<tr id="row_1_6_106_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Id.html" target="_self">Id</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_107_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IdNode.html" target="_self">IdNode</a></td><td class="desc">The unique identifier of variables </td></tr>
+<tr id="row_1_6_108_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1If.html" target="_self">If</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_109_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfNode.html" target="_self">IfNode</a></td><td class="desc">Container of <a class="el" href="classtvm_1_1relay_1_1If.html">If</a> </td></tr>
+<tr id="row_1_6_110_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfPattern.html" target="_self">IfPattern</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_111_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1IfPatternNode.html" target="_self">IfPatternNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_112_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InitOpAttrs.html" target="_self">InitOpAttrs</a></td><td class="desc">Attributes that specify a tensor </td></tr>
+<tr id="row_1_6_113_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1InstanceNormAttrs.html" target="_self">InstanceNormAttrs</a></td><td class="desc">Attributes used in instance_norm operator </td></tr>
+<tr id="row_1_6_114_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosure.html" target="_self">InterpreterClosure</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_115_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1InterpreterClosureObj.html" target="_self">InterpreterClosureObj</a></td><td class="desc">The container type of Closures used by the interpreter </td></tr>
+<tr id="row_1_6_116_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1L2NormalizeAttrs.html" target="_self">L2NormalizeAttrs</a></td><td class="desc">Attributes for L2Normalize operator </td></tr>
+<tr id="row_1_6_117_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayerNormAttrs.html" target="_self">LayerNormAttrs</a></td><td class="desc">Attributes used in layer_norm operator </td></tr>
+<tr id="row_1_6_118_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LayoutTransformAttrs.html" target="_self">LayoutTransformAttrs</a></td><td class="desc">Attributes for LayoutTransform operator </td></tr>
+<tr id="row_1_6_119_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LeakyReluAttrs.html" target="_self">LeakyReluAttrs</a></td><td class="desc">Attributes for leaky relu operator </td></tr>
+<tr id="row_1_6_120_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Let.html" target="_self">Let</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_121_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetNode.html" target="_self">LetNode</a></td><td class="desc">A binding of a sub-network </td></tr>
+<tr id="row_1_6_122_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetPattern.html" target="_self">LetPattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Let.html">Let</a> binding that binds a local var </td></tr>
+<tr id="row_1_6_123_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1LetPatternNode.html" target="_self">LetPatternNode</a></td><td class="desc">A binding of a sub-network </td></tr>
+<tr id="row_1_6_124_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1LRNAttrs.html" target="_self">LRNAttrs</a></td><td class="desc">Attributes for LRN operator </td></tr>
+<tr id="row_1_6_125_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Match.html" target="_self">Match</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_126_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MatchNode.html" target="_self">MatchNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Match.html">Match</a> container node </td></tr>
+<tr id="row_1_6_127_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MatmulAttrs.html" target="_self">MatmulAttrs</a></td><td class="desc">Attributes for matmul operator </td></tr>
+<tr id="row_1_6_128_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MatrixSetDiagAttrs.html" target="_self">MatrixSetDiagAttrs</a></td><td class="desc">Attributes used in matrix_set_diag operator </td></tr>
+<tr id="row_1_6_129_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool1DAttrs.html" target="_self">MaxPool1DAttrs</a></td><td class="desc">Attributes for 1D max pool operator </td></tr>
+<tr id="row_1_6_130_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool2DAttrs.html" target="_self">MaxPool2DAttrs</a></td><td class="desc">Attributes for max pool operator </td></tr>
+<tr id="row_1_6_131_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MaxPool3DAttrs.html" target="_self">MaxPool3DAttrs</a></td><td class="desc">Attributes for 3D max pool operator </td></tr>
+<tr id="row_1_6_132_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MeshgridAttrs.html" target="_self">MeshgridAttrs</a></td><td class="desc">Attributes used in meshgrid operators </td></tr>
+<tr id="row_1_6_133_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MetaScheduleLayoutTransformAttrs.html" target="_self">MetaScheduleLayoutTransformAttrs</a></td><td class="desc">Attributes for MetaScheduleLayoutTransform operator </td></tr>
+<tr id="row_1_6_134_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MirrorPadAttrs.html" target="_self">MirrorPadAttrs</a></td><td class="desc">Attributes used for the MirrorPadding operator </td></tr>
+<tr id="row_1_6_135_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeMutator.html" target="_self">MixedModeMutator</a></td><td class="desc">Non-recursive DFS Graph Traversal for Custom Rewriting Passes </td></tr>
+<tr id="row_1_6_136_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1MixedModeVisitor.html" target="_self">MixedModeVisitor</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprVisitor.html" title="A simple visitor wrapper around ExprFunctor. Recursively visit the content. ">ExprVisitor</a> which [...]
+<tr id="row_1_6_137_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxPriorAttrs.html" target="_self">MultiBoxPriorAttrs</a></td><td class="desc">Attributes used in multibox_prior operators </td></tr>
+<tr id="row_1_6_138_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultiBoxTransformLocAttrs.html" target="_self">MultiBoxTransformLocAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_139_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1MultinomialAttrs.html" target="_self">MultinomialAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_140_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NdarraySizeAttrs.html" target="_self">NdarraySizeAttrs</a></td><td class="desc">Attributes for ndarray_size operator </td></tr>
+<tr id="row_1_6_141_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NLLLossAttrs.html" target="_self">NLLLossAttrs</a></td><td class="desc">Attributes used in NLLLoss operator </td></tr>
+<tr id="row_1_6_142_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NonMaximumSuppressionAttrs.html" target="_self">NonMaximumSuppressionAttrs</a></td><td class="desc">Attributes used in non_maximum_suppression operator </td></tr>
+<tr id="row_1_6_143_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1NormalAttrs.html" target="_self">NormalAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_144_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OnDeviceAttrs.html" target="_self">OnDeviceAttrs</a></td><td class="desc">Attributes for the "on_device" annotation (ie operator) </td></tr>
+<tr id="row_1_6_145_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1OneHotAttrs.html" target="_self">OneHotAttrs</a></td><td class="desc">Attributes used in one-hot operator </td></tr>
+<tr id="row_1_6_146_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementation.html" target="_self">OpImplementation</a></td><td class="desc">Operator implementation class </td></tr>
+<tr id="row_1_6_147_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpImplementationNode.html" target="_self">OpImplementationNode</a></td><td class="desc">Operator implementation that includes compute and schedule function </td></tr>
+<tr id="row_1_6_148_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecialization.html" target="_self">OpSpecialization</a></td><td class="desc">Operator specialization class </td></tr>
+<tr id="row_1_6_149_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpSpecializationNode.html" target="_self">OpSpecializationNode</a></td><td class="desc">Specialized implementations for operators under certain conditions </td></tr>
+<tr id="row_1_6_150_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategy.html" target="_self">OpStrategy</a></td><td class="desc">Operator strategy class </td></tr>
+<tr id="row_1_6_151_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1OpStrategyNode.html" target="_self">OpStrategyNode</a></td><td class="desc">Operator strategy to choose implementation </td></tr>
+<tr id="row_1_6_152_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PadAttrs.html" target="_self">PadAttrs</a></td><td class="desc">Attributes used for the padding operator </td></tr>
+<tr id="row_1_6_153_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Pattern.html" target="_self">Pattern</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> is the base type for an ADT match pattern in Relay </td></tr>
+<tr id="row_1_6_154_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructor.html" target="_self">PatternConstructor</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_155_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternConstructorNode.html" target="_self">PatternConstructorNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
+<tr id="row_1_6_156_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" target="_self">PatternFunctor</a></td><td class="desc">A dynamical functor on ADT patterns that dispatches on its first argument. You can use this as a more powerful visitor, since it allows you to define the types of further arguments to Vi [...]
+<tr id="row_1_6_157_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternFunctor_3_01R_07const_01Pattern_01_6n_00_01Args_8_8_8_08_4.html" target="_self">PatternFunctor&lt; R(const Pattern &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_158_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternMutator.html" target="_self">PatternMutator</a></td><td class="desc">A wrapper around <a class="el" href="classtvm_1_1relay_1_1ExprFunctor.html" title="A dynamical functor that dispatches on in the first Expr argument. You can use this as a more powerfu.. [...]
+<tr id="row_1_6_159_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternNode.html" target="_self">PatternNode</a></td><td class="desc">Base type for declaring relay pattern </td></tr>
+<tr id="row_1_6_160_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTuple.html" target="_self">PatternTuple</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_161_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternTupleNode.html" target="_self">PatternTupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
+<tr id="row_1_6_162_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVar.html" target="_self">PatternVar</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_163_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVarNode.html" target="_self">PatternVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternVar.html">PatternVar</a> container node </td></tr>
+<tr id="row_1_6_164_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternVisitor.html" target="_self">PatternVisitor</a></td><td class="desc">A simple visitor wrapper around <a class="el" href="classtvm_1_1relay_1_1PatternFunctor.html" title="A dynamical functor on ADT patterns that dispatches on its first argument. You can us [...]
+<tr id="row_1_6_165_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html" target="_self">PatternWildcard</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_166_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1PatternWildcardNode.html" target="_self">PatternWildcardNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1PatternWildcard.html">PatternWildcard</a> container node </td></tr>
+<tr id="row_1_6_167_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1PReluAttrs.html" target="_self">PReluAttrs</a></td><td class="desc">Attributes for prelu operator </td></tr>
+<tr id="row_1_6_168_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ProposalAttrs.html" target="_self">ProposalAttrs</a></td><td class="desc">Attributes used in proposal operators </td></tr>
+<tr id="row_1_6_169_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosure.html" target="_self">RecClosure</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_170_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RecClosureObj.html" target="_self">RecClosureObj</a></td><td class="desc">The container type of <a class="el" href="classtvm_1_1relay_1_1RecClosure.html">RecClosure</a> </td></tr>
+<tr id="row_1_6_171_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReduceAttrs.html" target="_self">ReduceAttrs</a></td><td class="desc">Attributes for Reduce operators </td></tr>
+<tr id="row_1_6_172_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreate.html" target="_self">RefCreate</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_173_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefCreateNode.html" target="_self">RefCreateNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_174_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefRead.html" target="_self">RefRead</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_175_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefReadNode.html" target="_self">RefReadNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_176_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefValue.html" target="_self">RefValue</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_177_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RefValueObj.html" target="_self">RefValueObj</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_178_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWrite.html" target="_self">RefWrite</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_179_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RefWriteNode.html" target="_self">RefWriteNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_180_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RelayNode.html" target="_self">RelayNode</a></td><td class="desc">This is the base node container of all relay structures </td></tr>
+<tr id="row_1_6_181_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1RepeatAttrs.html" target="_self">RepeatAttrs</a></td><td class="desc">Attributes used in repeat operators </td></tr>
+<tr id="row_1_6_182_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeAttrs.html" target="_self">ReshapeAttrs</a></td><td class="desc">Attributes used in reshape operators </td></tr>
+<tr id="row_1_6_183_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeLikeAttrs.html" target="_self">ReshapeLikeAttrs</a></td><td class="desc">Attributes used in MXNet-style reshape_like operators </td></tr>
+<tr id="row_1_6_184_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReshapeTensorAttrs.html" target="_self">ReshapeTensorAttrs</a></td><td class="desc">Attributes for VM reshape_tensor operator </td></tr>
+<tr id="row_1_6_185_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Resize1DAttrs.html" target="_self">Resize1DAttrs</a></td><td class="desc">Attributes used in image resize1d operator </td></tr>
+<tr id="row_1_6_186_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Resize2DAttrs.html" target="_self">Resize2DAttrs</a></td><td class="desc">Attributes used in image resize2d operator </td></tr>
+<tr id="row_1_6_187_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1Resize3DAttrs.html" target="_self">Resize3DAttrs</a></td><td class="desc">Attributes used in image resize3d operator </td></tr>
+<tr id="row_1_6_188_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReverseAttrs.html" target="_self">ReverseAttrs</a></td><td class="desc">Attributes used in reverse operators </td></tr>
+<tr id="row_1_6_189_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ReverseSequenceAttrs.html" target="_self">ReverseSequenceAttrs</a></td><td class="desc">Attributes used in reverse_sequence operators </td></tr>
+<tr id="row_1_6_190_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIAlignAttrs.html" target="_self">ROIAlignAttrs</a></td><td class="desc">Attributes used in roi_align operators </td></tr>
+<tr id="row_1_6_191_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ROIPoolAttrs.html" target="_self">ROIPoolAttrs</a></td><td class="desc">Attributes used in roi_pool operators </td></tr>
+<tr id="row_1_6_192_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Runtime.html" target="_self">Runtime</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html" title="Runtime information. ">RuntimeNode</a> </td></tr>
+<tr id="row_1_6_193_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RuntimeNode.html" target="_self">RuntimeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Runtime.html" title="Managed reference class to RuntimeNode. ">Runtime</a> information </td></tr>
+<tr id="row_1_6_194_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1RuntimeRegEntry.html" target="_self">RuntimeRegEntry</a></td><td class="desc">Helper structure to register Runtimes </td></tr>
+<tr id="row_1_6_195_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScanopAttrs.html" target="_self">ScanopAttrs</a></td><td class="desc">Attributes used in cumsum and cumprod operator </td></tr>
+<tr id="row_1_6_196_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScatterAddAttrs.html" target="_self">ScatterAddAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_197_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScatterAttrs.html" target="_self">ScatterAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_198_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ScatterNDAttrs.html" target="_self">ScatterNDAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_199_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SearchSortedAttrs.html" target="_self">SearchSortedAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_200_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SequenceMaskAttrs.html" target="_self">SequenceMaskAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_201_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeFuncAttrs.html" target="_self">ShapeFuncAttrs</a></td><td class="desc">Options for the shape function operator </td></tr>
+<tr id="row_1_6_202_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ShapeOfAttrs.html" target="_self">ShapeOfAttrs</a></td><td class="desc">Attributes for ShapeOf operator </td></tr>
+<tr id="row_1_6_203_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ShapePattern.html" target="_self">ShapePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
+<tr id="row_1_6_204_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1ShapePatternNode.html" target="_self">ShapePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Shapes </td></tr>
+<tr id="row_1_6_205_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SliceLikeAttrs.html" target="_self">SliceLikeAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_206_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SlidingWindowAttrs.html" target="_self">SlidingWindowAttrs</a></td><td class="desc">Attributes used for the sliding_window operator </td></tr>
+<tr id="row_1_6_207_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SoftmaxAttrs.html" target="_self">SoftmaxAttrs</a></td><td class="desc">Attributes used in softmax operators </td></tr>
+<tr id="row_1_6_208_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SpaceToBatchNDAttrs.html" target="_self">SpaceToBatchNDAttrs</a></td><td class="desc">Attributes used in SpaceToBatchND operator </td></tr>
+<tr id="row_1_6_209_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseConv2DAttrs.html" target="_self">SparseConv2DAttrs</a></td><td class="desc">Attributes for sparse_dense operator </td></tr>
+<tr id="row_1_6_210_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseDenseAttrs.html" target="_self">SparseDenseAttrs</a></td><td class="desc">Attributes for sparse_dense operator </td></tr>
+<tr id="row_1_6_211_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseToDenseAttrs.html" target="_self">SparseToDenseAttrs</a></td><td class="desc">Attributes used in sparse_to_dense operator </td></tr>
+<tr id="row_1_6_212_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SparseTransposeAttrs.html" target="_self">SparseTransposeAttrs</a></td><td class="desc">Attributes for sparse_transpose operator </td></tr>
+<tr id="row_1_6_213_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SplitAttrs.html" target="_self">SplitAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_214_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SqueezeAttrs.html" target="_self">SqueezeAttrs</a></td><td class="desc">Attributes used in squeeze operators </td></tr>
+<tr id="row_1_6_215_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StackAttrs.html" target="_self">StackAttrs</a></td><td class="desc">Attributes used in stack operators </td></tr>
+<tr id="row_1_6_216_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StftAttrs.html" target="_self">StftAttrs</a></td><td class="desc">Attributes used in stft operator </td></tr>
+<tr id="row_1_6_217_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1StridedSliceAttrs.html" target="_self">StridedSliceAttrs</a></td><td class="desc">Attributes for StridedSlice operator </td></tr>
+<tr id="row_1_6_218_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1SubPixelAttrs.html" target="_self">SubPixelAttrs</a></td><td class="desc">Attributes used in subpixel operators </td></tr>
+<tr id="row_1_6_219_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TakeAttrs.html" target="_self">TakeAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_220_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExpr.html" target="_self">TempExpr</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_221_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TempExprNode.html" target="_self">TempExprNode</a></td><td class="desc">Base class of the temporary expression </td></tr>
+<tr id="row_1_6_222_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1ThreefryGenerateAttrs.html" target="_self">ThreefryGenerateAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_223_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TileAttrs.html" target="_self">TileAttrs</a></td><td class="desc">Attributes used in tile operators </td></tr>
+<tr id="row_1_6_224_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TopKAttrs.html" target="_self">TopKAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_225_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TransposeAttrs.html" target="_self">TransposeAttrs</a></td><td class="desc">Attributes used in transpose operators </td></tr>
+<tr id="row_1_6_226_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1TriluAttrs.html" target="_self">TriluAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_227_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Tuple.html" target="_self">Tuple</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_228_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItem.html" target="_self">TupleGetItem</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_229_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemNode.html" target="_self">TupleGetItemNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_230_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemPattern.html" target="_self">TupleGetItemPattern</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_231_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleGetItemPatternNode.html" target="_self">TupleGetItemPatternNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_232_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TupleNode.html" target="_self">TupleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Tuple.html">Tuple</a> container </td></tr>
+<tr id="row_1_6_233_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TuplePattern.html" target="_self">TuplePattern</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_234_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TuplePatternNode.html" target="_self">TuplePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Tuple.html">Tuple</a> container </td></tr>
+<tr id="row_1_6_235_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TypePattern.html" target="_self">TypePattern</a></td><td class="desc">A pattern which matches a type in another pattern </td></tr>
+<tr id="row_1_6_236_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1TypePatternNode.html" target="_self">TypePatternNode</a></td><td class="desc"><a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> for Types </td></tr>
+<tr id="row_1_6_237_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UniformAttrs.html" target="_self">UniformAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_238_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UniqueAttrs.html" target="_self">UniqueAttrs</a></td><td class="desc">Attributes used in unique operator </td></tr>
+<tr id="row_1_6_239_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSampling3DAttrs.html" target="_self">UpSampling3DAttrs</a></td><td class="desc">Attributes for upsampling3d operator </td></tr>
+<tr id="row_1_6_240_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1UpSamplingAttrs.html" target="_self">UpSamplingAttrs</a></td><td class="desc">Attributes for upsampling operator </td></tr>
+<tr id="row_1_6_241_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1v__info.html" target="_self">v_info</a></td><td class="desc">A struct to keep info of traversed expr in ExpandDataflow function </td></tr>
+<tr id="row_1_6_242_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1Var.html" target="_self">Var</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_243_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1VarianceAttrs.html" target="_self">VarianceAttrs</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_244_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarNode.html" target="_self">VarNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Var.html">Var</a> </td></tr>
+<tr id="row_1_6_245_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarPattern.html" target="_self">VarPattern</a></td><td class="desc"></td></tr>
+<tr id="row_1_6_246_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1VarPatternNode.html" target="_self">VarPatternNode</a></td><td class="desc">Container for <a class="el" href="classtvm_1_1relay_1_1Var.html">Var</a> </td></tr>
+<tr id="row_1_6_247_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1WildcardPattern.html" target="_self">WildcardPattern</a></td><td class="desc">A pattern which matches anything </td></tr>
+<tr id="row_1_6_248_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1relay_1_1WildcardPatternNode.html" target="_self">WildcardPatternNode</a></td><td class="desc">Wildcard <a class="el" href="classtvm_1_1relay_1_1Pattern.html" title="Pattern is the base type for an ADT match pattern in Relay. ">Pattern</a> </td></tr>
+<tr id="row_1_6_249_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1relay_1_1YoloReorgAttrs.html" target="_self">YoloReorgAttrs</a></td><td class="desc">Attributes used in yolo reorg operators </td></tr>
 <tr id="row_1_7_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_7_" class="arrow" onclick="toggleFolder('1_7_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1runtime.html" target="_self">runtime</a></td><td class="desc"></td></tr>
 <tr id="row_1_7_0_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span id="arr_1_7_0_" class="arrow" onclick="toggleFolder('1_7_0_')">&#9658;</span><span class="icona"><span class="icon">N</span></span><a class="el" href="namespacetvm_1_1runtime_1_1metadata.html" target="_self">metadata</a></td><td class="desc"></td></tr>
 <tr id="row_1_7_0_0_" class="even" style="display:none;"><td class="entry"><span style="width:64px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1runtime_1_1metadata_1_1ArrayAccessor.html" target="_self">ArrayAccessor</a></td><td class="desc">A span-like class which permits access to <a class="el" href="classtvm_1_1runtime_1_1Array.html" title="Array, container representing a contiguous sequence of ObjectRefs. ">A [...]
@@ -1056,149 +1059,146 @@ $(function() {
 <tr id="row_1_38_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Bool.html" target="_self">Bool</a></td><td class="desc">Boolean constant </td></tr>
 <tr id="row_1_39_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1CompilationConfig.html" target="_self">CompilationConfig</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1CompilationConfig.html" title="Managed reference class to CompilationConfig. ">CompilationConfig</a></code> </td></tr>
 <tr id="row_1_40_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1CompilationConfigNode.html" target="_self">CompilationConfigNode</a></td><td class="desc">Gathers the <code>Targets</code> and distinguished <code>VirtualDevices</code> in canonical form needed to compile a Relay module for execution over possibly heterogeneous devices. Cen [...]
-<tr id="row_1_41_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1CompileError.html" target="_self">CompileError</a></td><td class="desc">Custom Error class to be thrown during compilation </td></tr>
-<tr id="row_1_42_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantInfo.html" target="_self">ConstantInfo</a></td><td class="desc"></td></tr>
-<tr id="row_1_43_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantInfoNode.html" target="_self">ConstantInfoNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_44_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantMemoryPools.html" target="_self">ConstantMemoryPools</a></td><td class="desc"></td></tr>
-<tr id="row_1_45_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantMemoryPoolsNode.html" target="_self">ConstantMemoryPoolsNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_46_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantPoolInfo.html" target="_self">ConstantPoolInfo</a></td><td class="desc"></td></tr>
-<tr id="row_1_47_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantPoolInfoNode.html" target="_self">ConstantPoolInfoNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_48_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Constructor.html" target="_self">Constructor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1ConstructorNode.html" title="ADT constructor. Constructors compare by pointer equality. ">ConstructorNode</a> </td></tr>
-<tr id="row_1_49_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstructorNode.html" target="_self">ConstructorNode</a></td><td class="desc">ADT constructor. Constructors compare by pointer equality </td></tr>
-<tr id="row_1_50_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Diagnostic.html" target="_self">Diagnostic</a></td><td class="desc"></td></tr>
-<tr id="row_1_51_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticBuilder.html" target="_self">DiagnosticBuilder</a></td><td class="desc">A wrapper around std::stringstream to build a diagnostic </td></tr>
-<tr id="row_1_52_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContext.html" target="_self">DiagnosticContext</a></td><td class="desc"></td></tr>
-<tr id="row_1_53_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContextNode.html" target="_self">DiagnosticContextNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_54_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticNode.html" target="_self">DiagnosticNode</a></td><td class="desc">A compiler diagnostic message </td></tr>
-<tr id="row_1_55_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRenderer.html" target="_self">DiagnosticRenderer</a></td><td class="desc"></td></tr>
-<tr id="row_1_56_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRendererNode.html" target="_self">DiagnosticRendererNode</a></td><td class="desc">Display diagnostics in a given display format </td></tr>
-<tr id="row_1_57_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrs.html" target="_self">DictAttrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...">DictAttrsNode</a> </td></tr>
-<tr id="row_1_58_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrsNode.html" target="_self">DictAttrsNode</a></td><td class="desc">Specialized attribute type that is backed by a map. The <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the  [...]
-<tr id="row_1_59_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFunc.html" target="_self">EnvFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1EnvFuncNode.html" title="A serializable function backed by TVM&#39;s global environment. ">EnvFuncNode</a> </td></tr>
-<tr id="row_1_60_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFuncNode.html" target="_self">EnvFuncNode</a></td><td class="desc">A serializable function backed by TVM's global environment </td></tr>
-<tr id="row_1_61_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ErrorBuilder.html" target="_self">ErrorBuilder</a></td><td class="desc">A wrapper around std::stringstream to build error </td></tr>
-<tr id="row_1_62_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ErrorReporter.html" target="_self">ErrorReporter</a></td><td class="desc">An abstraction around how errors are stored and reported. Designed to be opaque to users, so we can support a robust and simpler error reporting mode, as well as a more complex mode </td></tr>
-<tr id="row_1_63_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImm.html" target="_self">FloatImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1FloatImmNode.html" title="Constant floating point literals in the program. ">FloatImmNode</a> </td></tr>
-<tr id="row_1_64_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImmNode.html" target="_self">FloatImmNode</a></td><td class="desc">Constant floating point literals in the program </td></tr>
-<tr id="row_1_65_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncType.html" target="_self">FuncType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1FuncTypeNode.html" title="Function type. ">FuncTypeNode</a> </td></tr>
-<tr id="row_1_66_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncTypeNode.html" target="_self">FuncTypeNode</a></td><td class="desc">Function type </td></tr>
-<tr id="row_1_67_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFunc.html" target="_self">GenericFunc</a></td><td class="desc">Generic function that can be specialized on a per-target basis </td></tr>
-<tr id="row_1_68_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFuncNode.html" target="_self">GenericFuncNode</a></td><td class="desc">Represents a generic function that can be specialized on a per-target basis </td></tr>
-<tr id="row_1_69_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVar.html" target="_self">GlobalTypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalTypeVarNode.html" title="A global type variable that is used for defining new types or type aliases. ">GlobalTypeVarNode</a> </td></tr>
-<tr id="row_1_70_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVarNode.html" target="_self">GlobalTypeVarNode</a></td><td class="desc">A global type variable that is used for defining new types or type aliases </td></tr>
-<tr id="row_1_71_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVar.html" target="_self">GlobalVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalVarNode.html" title="Global variable that lives in the top-level module. ">GlobalVarNode</a> </td></tr>
-<tr id="row_1_72_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarNode.html" target="_self">GlobalVarNode</a></td><td class="desc">Global variable that lives in the top-level module </td></tr>
-<tr id="row_1_73_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupply.html" target="_self">GlobalVarSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" title="GlobalVarSupply can be used to generate unique GlobalVars. ">GlobalVarSupplyNode</a> </td></tr>
-<tr id="row_1_74_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" target="_self">GlobalVarSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1GlobalVarSupply.html" title="Managed reference class to GlobalVarSupplyNode. ">GlobalVarSupply</a> can be used to generate unique GlobalVars </td></tr>
-<tr id="row_1_75_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteType.html" target="_self">IncompleteType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1IncompleteTypeNode.html" title="Intermediate values that is used to indicate incomplete type during type inference. ">IncompleteTypeNode</a> </td></tr>
-<tr id="row_1_76_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteTypeNode.html" target="_self">IncompleteTypeNode</a></td><td class="desc">Intermediate values that is used to indicate incomplete type during type inference </td></tr>
-<tr id="row_1_77_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Integer.html" target="_self">Integer</a></td><td class="desc">Container of constant int that adds more constructors </td></tr>
-<tr id="row_1_78_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImm.html" target="_self">IntImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IntImmNode.html" title="Constant integer literals in the program. ">IntImmNode</a> </td></tr>
-<tr id="row_1_79_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImmNode.html" target="_self">IntImmNode</a></td><td class="desc">Constant integer literals in the program </td></tr>
-<tr id="row_1_80_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModule.html" target="_self">IRModule</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IRModuleNode.html" title="IRModule that holds functions and type definitions. ">IRModuleNode</a> </td></tr>
-<tr id="row_1_81_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModuleNode.html" target="_self">IRModuleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1IRModule.html" title="Managed reference class to IRModuleNode. ">IRModule</a> that holds functions and type definitions </td></tr>
-<tr id="row_1_82_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePath.html" target="_self">MapValuePath</a></td><td class="desc"></td></tr>
-<tr id="row_1_83_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePathNode.html" target="_self">MapValuePathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_84_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfo.html" target="_self">MemoryInfo</a></td><td class="desc">Defines memory info </td></tr>
-<tr id="row_1_85_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfoNode.html" target="_self">MemoryInfoNode</a></td><td class="desc">Memory information of special memory region. Use <a class="el" href="classtvm_1_1MemoryInfo.html" title="Defines memory info. ">MemoryInfo</a> as its container type </td></tr>
-<tr id="row_1_86_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPath.html" target="_self">MissingArrayElementPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_87_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPathNode.html" target="_self">MissingArrayElementPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_88_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPath.html" target="_self">MissingMapEntryPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_89_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPathNode.html" target="_self">MissingMapEntryPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_90_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupply.html" target="_self">NameSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1NameSupplyNode.html" title="NameSupply can be used to generate unique names. ">NameSupplyNode</a> </td></tr>
-<tr id="row_1_91_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupplyNode.html" target="_self">NameSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1NameSupply.html" title="Managed reference class to NameSupplyNode. ">NameSupply</a> can be used to generate unique names </td></tr>
-<tr id="row_1_92_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1NDArrayContainerTrait.html" target="_self">NDArrayContainerTrait</a></td><td class="desc"></td></tr>
-<tr id="row_1_93_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor.html" target="_self">NodeFunctor</a></td><td class="desc">A dynamically dispatched functor on the type of the first argument </td></tr>
-<tr id="row_1_94_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html" target="_self">NodeFunctor&lt; R(const ObjectRef &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_95_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPath.html" target="_self">ObjectPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_96_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathNode.html" target="_self">ObjectPathNode</a></td><td class="desc">Path to an object from some root object </td></tr>
-<tr id="row_1_97_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPair.html" target="_self">ObjectPathPair</a></td><td class="desc"></td></tr>
-<tr id="row_1_98_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPairNode.html" target="_self">ObjectPathPairNode</a></td><td class="desc">Pair of <code><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a></code>s, one for each object being tested for structural equality </td></tr>
-<tr id="row_1_99_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Op.html" target="_self">Op</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1OpNode.html" title="Primitive Op(builtin intrinsics) ">OpNode</a> </td></tr>
-<tr id="row_1_100_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpAttrMap.html" target="_self">OpAttrMap</a></td><td class="desc">Map&lt;Op,ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1Op.html" title="Managed reference class to OpNode. ">Op</a> </td></tr>
-<tr id="row_1_101_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpNode.html" target="_self">OpNode</a></td><td class="desc">Primitive Op(builtin intrinsics) </td></tr>
-<tr id="row_1_102_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpRegEntry.html" target="_self">OpRegEntry</a></td><td class="desc">Helper structure to register operators </td></tr>
-<tr id="row_1_103_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerType.html" target="_self">PointerType</a></td><td class="desc"></td></tr>
-<tr id="row_1_104_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerTypeNode.html" target="_self">PointerTypeNode</a></td><td class="desc">Low-level raw pointer type </td></tr>
-<tr id="row_1_105_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfo.html" target="_self">PoolInfo</a></td><td class="desc">Base class for <a class="el" href="classtvm_1_1WorkspacePoolInfo.html">WorkspacePoolInfo</a> and <a class="el" href="classtvm_1_1ConstantPoolInfo.html">ConstantPoolInfo</a> </td></tr>
-<tr id="row_1_106_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoNode.html" target="_self">PoolInfoNode</a></td><td class="desc">Describes a pool of memory accessible by one or more targets </td></tr>
-<tr id="row_1_107_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfoProperties.html" target="_self">PoolInfoProperties</a></td><td class="desc"></td></tr>
-<tr id="row_1_108_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoPropertiesNode.html" target="_self">PoolInfoPropertiesNode</a></td><td class="desc">Describes a pool of memory properties </td></tr>
-<tr id="row_1_109_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExpr.html" target="_self">PrimExpr</a></td><td class="desc">Reference to <a class="el" href="classtvm_1_1PrimExprNode.html" title="Base node of all primitive expressions. ">PrimExprNode</a> </td></tr>
-<tr id="row_1_110_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExprNode.html" target="_self">PrimExprNode</a></td><td class="desc">Base node of all primitive expressions </td></tr>
-<tr id="row_1_111_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimType.html" target="_self">PrimType</a></td><td class="desc"></td></tr>
-<tr id="row_1_112_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimTypeNode.html" target="_self">PrimTypeNode</a></td><td class="desc">Primitive data types used in the low-level IR </td></tr>
-<tr id="row_1_113_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Range.html" target="_self">Range</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> constainer </td></tr>
-<tr id="row_1_114_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RangeNode.html" target="_self">RangeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> over one dimension </td></tr>
-<tr id="row_1_115_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_115_" class="arrow" onclick="toggleFolder('1_115_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable.html" target="_self">ReflectionVTable</a></td><td class="desc">Virtual function table to support IR/AST node reflection </td></tr>
-<tr id="row_1_115_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" target="_self">Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" title="Registry of a reflection table. ">Registry</a> of a reflection table </td></tr>
-<tr id="row_1_116_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExpr.html" target="_self">RelayExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayExprNode.html" title="Base node of all non-primitive expressions. ">RelayExprNode</a> </td></tr>
-<tr id="row_1_117_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExprNode.html" target="_self">RelayExprNode</a></td><td class="desc">Base node of all non-primitive expressions </td></tr>
-<tr id="row_1_118_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefType.html" target="_self">RelayRefType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayRefTypeNode.html" title="Reference Type High-level Relay IR. ">RelayRefTypeNode</a> </td></tr>
-<tr id="row_1_119_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefTypeNode.html" target="_self">RelayRefTypeNode</a></td><td class="desc">Reference <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> High-level Relay IR </td></tr>
-<tr id="row_1_120_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprLegacyPrinter.html" target="_self">ReprLegacyPrinter</a></td><td class="desc">Legacy behavior of <a class="el" href="classtvm_1_1ReprPrinter.html" title="A printer class to print the AST/IR nodes. ">ReprPrinter</a> </td></tr>
-<tr id="row_1_121_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprPrinter.html" target="_self">ReprPrinter</a></td><td class="desc">A printer class to print the AST/IR nodes </td></tr>
-<tr id="row_1_122_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPath.html" target="_self">RootPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_123_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPathNode.html" target="_self">RootPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_124_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualHandlerDefault.html" target="_self">SEqualHandlerDefault</a></td><td class="desc">The default handler for equality testing </td></tr>
-<tr id="row_1_125_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_125_" class="arrow" onclick="toggleFolder('1_125_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer.html" target="_self">SEqualReducer</a></td><td class="desc">A Reducer class to reduce the structural equality result of two objects </td></tr>
-<tr id="row_1_125_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
-<tr id="row_1_126_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashHandlerDefault.html" target="_self">SHashHandlerDefault</a></td><td class="desc">The default handler for hash key computation </td></tr>
-<tr id="row_1_127_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_127_" class="arrow" onclick="toggleFolder('1_127_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer.html" target="_self">SHashReducer</a></td><td class="desc">A Reducer class to reduce the structural hash value </td></tr>
-<tr id="row_1_127_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
-<tr id="row_1_128_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceName.html" target="_self">SourceName</a></td><td class="desc">The source name of a file span </td></tr>
-<tr id="row_1_129_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceNameNode.html" target="_self">SourceNameNode</a></td><td class="desc">The name of a source fragment </td></tr>
-<tr id="row_1_130_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Span.html" target="_self">Span</a></td><td class="desc"></td></tr>
-<tr id="row_1_131_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SpanNode.html" target="_self">SpanNode</a></td><td class="desc">Stores locations in frontend source that generated a node </td></tr>
-<tr id="row_1_132_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralEqual.html" target="_self">StructuralEqual</a></td><td class="desc">Content-aware structural equality comparator for objects </td></tr>
-<tr id="row_1_133_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralHash.html" target="_self">StructuralHash</a></td><td class="desc">Content-aware structural hasing </td></tr>
-<tr id="row_1_134_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Target.html" target="_self">Target</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetNode.html" title="Compilation target. ">TargetNode</a> </td></tr>
-<tr id="row_1_135_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKind.html" target="_self">TargetKind</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetKindNode.html" title="Target kind, specifies the kind of the target. ">TargetKindNode</a> </td></tr>
-<tr id="row_1_136_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindAttrMap.html" target="_self">TargetKindAttrMap</a></td><td class="desc">Map&lt;TargetKind, ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
-<tr id="row_1_137_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindNode.html" target="_self">TargetKindNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> kind, specifies the kind of the target </td></tr>
-<tr id="row_1_138_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindRegEntry.html" target="_self">TargetKindRegEntry</a></td><td class="desc">Helper structure to register <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
-<tr id="row_1_139_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetNode.html" target="_self">TargetNode</a></td><td class="desc">Compilation target </td></tr>
-<tr id="row_1_140_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTag.html" target="_self">TargetTag</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetTagNode.html" title="A target tag. ">TargetTagNode</a> </td></tr>
-<tr id="row_1_141_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagNode.html" target="_self">TargetTagNode</a></td><td class="desc">A target tag </td></tr>
-<tr id="row_1_142_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagRegEntry.html" target="_self">TargetTagRegEntry</a></td><td class="desc"></td></tr>
-<tr id="row_1_143_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineType.html" target="_self">TensorAffineType</a></td><td class="desc">Managed reference to AffineTypes </td></tr>
-<tr id="row_1_144_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineTypeNode.html" target="_self">TensorAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TensorAffineType.html" title="Managed reference to AffineTypes. ">TensorAffineType</a> representation </td></tr>
-<tr id="row_1_145_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorType.html" target="_self">TensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TensorTypeNode.html" title="This is the most commonly used type in relay. TensorType have a fixed dimension, data type...">TensorTypeNode</a> </td></tr>
-<tr id="row_1_146_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorTypeNode.html" target="_self">TensorTypeNode</a></td><td class="desc">This is the most commonly used type in relay. <a class="el" href="classtvm_1_1TensorType.html" title="Managed reference to TensorTypeNode. ">TensorType</a> have a fixed dimension, data type </td></tr>
-<tr id="row_1_147_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineType.html" target="_self">TupleAffineType</a></td><td class="desc">Managed reference to TupleAffineTypes </td></tr>
-<tr id="row_1_148_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineTypeNode.html" target="_self">TupleAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TupleAffineType.html" title="Managed reference to TupleAffineTypes. ">TupleAffineType</a> representation </td></tr>
-<tr id="row_1_149_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleType.html" target="_self">TupleType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TupleTypeNode.html" title="The type of tuple values. ">TupleTypeNode</a> </td></tr>
-<tr id="row_1_150_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleTypeNode.html" target="_self">TupleTypeNode</a></td><td class="desc">The type of tuple values </td></tr>
-<tr id="row_1_151_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Type.html" target="_self">Type</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeNode.html" title="Type is the base type of all types. ">TypeNode</a> </td></tr>
-<tr id="row_1_152_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCall.html" target="_self">TypeCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeCallNode.html" title="Type function application. ">TypeCallNode</a> </td></tr>
-<tr id="row_1_153_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCallNode.html" target="_self">TypeCallNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> function application </td></tr>
-<tr id="row_1_154_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraint.html" target="_self">TypeConstraint</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeConstraintNode.html" title="Potential Constraints in a function. ">TypeConstraintNode</a> </td></tr>
-<tr id="row_1_155_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraintNode.html" target="_self">TypeConstraintNode</a></td><td class="desc">Potential Constraints in a function </td></tr>
-<tr id="row_1_156_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeData.html" target="_self">TypeData</a></td><td class="desc">Stores all data for an Algebraic Data <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> (ADT) </td></tr>
-<tr id="row_1_157_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeDataNode.html" target="_self">TypeDataNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeData.html" title="Stores all data for an Algebraic Data Type (ADT). ">TypeData</a> container node </td></tr>
-<tr id="row_1_158_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc.html" target="_self">TypedEnvFunc</a></td><td class="desc">Please refer to <a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#TypedEnvFuncAnchor">TypedEnvFunc&lt;R(Args..)&gt;</a> </td></tr>
-<tr id="row_1_159_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html" target="_self">TypedEnvFunc&lt; R(Args...)&gt;</a></td><td class="desc">A typed version of <a class="el" href="classtvm_1_1EnvFunc.html" title="Managed reference to EnvFuncNode. ">EnvFunc</a>. It is backed by a GlobalFuncNode inte [...]
-<tr id="row_1_160_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">TypeFunctor</a></td><td class="desc"></td></tr>
-<tr id="row_1_161_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_self">TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
-<tr id="row_1_162_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeMutator.html" target="_self">TypeMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeMutator.html" title="TypeMutator that mutates expressions. ">TypeMutator</a> that mutates expressions </td></tr>
-<tr id="row_1_163_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeNode.html" target="_self">TypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> is the base type of all types </td></tr>
-<tr id="row_1_164_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelation.html" target="_self">TypeRelation</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeRelationNode.html" title="User defined type relation, it is an input-output relation on types. ">TypeRelationNode</a> </td></tr>
-<tr id="row_1_165_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelationNode.html" target="_self">TypeRelationNode</a></td><td class="desc">User defined type relation, it is an input-output relation on types </td></tr>
-<tr id="row_1_166_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporter.html" target="_self">TypeReporter</a></td><td class="desc">Container class of <a class="el" href="classtvm_1_1TypeReporter.html" title="Container class of TypeReporter. ">TypeReporter</a> </td></tr>
-<tr id="row_1_167_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporterNode.html" target="_self">TypeReporterNode</a></td><td class="desc">Reporter that reports back to the type resolution information </td></tr>
-<tr id="row_1_168_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVar.html" target="_self">TypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeVarNode.html" title="Type parameter in functions. ">TypeVarNode</a> </td></tr>
-<tr id="row_1_169_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVarNode.html" target="_self">TypeVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> parameter in functions </td></tr>
-<tr id="row_1_170_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVisitor.html" target="_self">TypeVisitor</a></td><td class="desc">A type visitor that recursively visit types </td></tr>
-<tr id="row_1_171_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPath.html" target="_self">UnknownAttributeAccessPath</a></td><td class="desc"></td></tr>
-<tr id="row_1_172_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPathNode.html" target="_self">UnknownAttributeAccessPathNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_173_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDevice.html" target="_self">VirtualDevice</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1VirtualDeviceNode.html" title="Describes at compile time the constraints on where data is to be stored at runtime down to the (virtu.. [...]
-<tr id="row_1_174_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceCache.html" target="_self">VirtualDeviceCache</a></td><td class="desc">A cache of <code>VirtualDevices</code>. This can be used: </td></tr>
-<tr id="row_1_175_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceNode.html" target="_self">VirtualDeviceNode</a></td><td class="desc">Describes at compile time the constraints on where data is to be stored at runtime down to the (virtual) device and memory scope level, and how to compile code to compute that data. Used by t [...]
-<tr id="row_1_176_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1With.html" target="_self">With</a></td><td class="desc">RAII wrapper function to enter and exit a context object similar to python's with syntax </td></tr>
-<tr id="row_1_177_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspaceMemoryPools.html" target="_self">WorkspaceMemoryPools</a></td><td class="desc"></td></tr>
-<tr id="row_1_178_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspaceMemoryPoolsNode.html" target="_self">WorkspaceMemoryPoolsNode</a></td><td class="desc"></td></tr>
-<tr id="row_1_179_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspacePoolInfo.html" target="_self">WorkspacePoolInfo</a></td><td class="desc"></td></tr>
-<tr id="row_1_180_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspacePoolInfoNode.html" target="_self">WorkspacePoolInfoNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_41_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantInfo.html" target="_self">ConstantInfo</a></td><td class="desc"></td></tr>
+<tr id="row_1_42_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantInfoNode.html" target="_self">ConstantInfoNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_43_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantMemoryPools.html" target="_self">ConstantMemoryPools</a></td><td class="desc"></td></tr>
+<tr id="row_1_44_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantMemoryPoolsNode.html" target="_self">ConstantMemoryPoolsNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_45_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstantPoolInfo.html" target="_self">ConstantPoolInfo</a></td><td class="desc"></td></tr>
+<tr id="row_1_46_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1ConstantPoolInfoNode.html" target="_self">ConstantPoolInfoNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_47_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Constructor.html" target="_self">Constructor</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1ConstructorNode.html" title="ADT constructor. Constructors compare by pointer equality. ">ConstructorNode</a> </td></tr>
+<tr id="row_1_48_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ConstructorNode.html" target="_self">ConstructorNode</a></td><td class="desc">ADT constructor. Constructors compare by pointer equality </td></tr>
+<tr id="row_1_49_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Diagnostic.html" target="_self">Diagnostic</a></td><td class="desc"></td></tr>
+<tr id="row_1_50_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticBuilder.html" target="_self">DiagnosticBuilder</a></td><td class="desc">A wrapper around std::stringstream to build a diagnostic </td></tr>
+<tr id="row_1_51_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContext.html" target="_self">DiagnosticContext</a></td><td class="desc"></td></tr>
+<tr id="row_1_52_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticContextNode.html" target="_self">DiagnosticContextNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_53_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticNode.html" target="_self">DiagnosticNode</a></td><td class="desc">A compiler diagnostic message </td></tr>
+<tr id="row_1_54_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRenderer.html" target="_self">DiagnosticRenderer</a></td><td class="desc"></td></tr>
+<tr id="row_1_55_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DiagnosticRendererNode.html" target="_self">DiagnosticRendererNode</a></td><td class="desc">Display diagnostics in a given display format </td></tr>
+<tr id="row_1_56_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrs.html" target="_self">DictAttrs</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the Attrs behavior...">DictAttrsNode</a> </td></tr>
+<tr id="row_1_57_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1DictAttrsNode.html" target="_self">DictAttrsNode</a></td><td class="desc">Specialized attribute type that is backed by a map. The <a class="el" href="classtvm_1_1DictAttrsNode.html" title="Specialized attribute type that is backed by a map. The DictAttrsNode implements the  [...]
+<tr id="row_1_58_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFunc.html" target="_self">EnvFunc</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1EnvFuncNode.html" title="A serializable function backed by TVM&#39;s global environment. ">EnvFuncNode</a> </td></tr>
+<tr id="row_1_59_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1EnvFuncNode.html" target="_self">EnvFuncNode</a></td><td class="desc">A serializable function backed by TVM's global environment </td></tr>
+<tr id="row_1_60_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImm.html" target="_self">FloatImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1FloatImmNode.html" title="Constant floating point literals in the program. ">FloatImmNode</a> </td></tr>
+<tr id="row_1_61_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FloatImmNode.html" target="_self">FloatImmNode</a></td><td class="desc">Constant floating point literals in the program </td></tr>
+<tr id="row_1_62_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncType.html" target="_self">FuncType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1FuncTypeNode.html" title="Function type. ">FuncTypeNode</a> </td></tr>
+<tr id="row_1_63_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1FuncTypeNode.html" target="_self">FuncTypeNode</a></td><td class="desc">Function type </td></tr>
+<tr id="row_1_64_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFunc.html" target="_self">GenericFunc</a></td><td class="desc">Generic function that can be specialized on a per-target basis </td></tr>
+<tr id="row_1_65_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GenericFuncNode.html" target="_self">GenericFuncNode</a></td><td class="desc">Represents a generic function that can be specialized on a per-target basis </td></tr>
+<tr id="row_1_66_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVar.html" target="_self">GlobalTypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalTypeVarNode.html" title="A global type variable that is used for defining new types or type aliases. ">GlobalTypeVarNode</a> </td></tr>
+<tr id="row_1_67_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalTypeVarNode.html" target="_self">GlobalTypeVarNode</a></td><td class="desc">A global type variable that is used for defining new types or type aliases </td></tr>
+<tr id="row_1_68_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVar.html" target="_self">GlobalVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1GlobalVarNode.html" title="Global variable that lives in the top-level module. ">GlobalVarNode</a> </td></tr>
+<tr id="row_1_69_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarNode.html" target="_self">GlobalVarNode</a></td><td class="desc">Global variable that lives in the top-level module </td></tr>
+<tr id="row_1_70_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupply.html" target="_self">GlobalVarSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" title="GlobalVarSupply can be used to generate unique GlobalVars. ">GlobalVarSupplyNode</a> </td></tr>
+<tr id="row_1_71_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1GlobalVarSupplyNode.html" target="_self">GlobalVarSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1GlobalVarSupply.html" title="Managed reference class to GlobalVarSupplyNode. ">GlobalVarSupply</a> can be used to generate unique GlobalVars </td></tr>
+<tr id="row_1_72_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteType.html" target="_self">IncompleteType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1IncompleteTypeNode.html" title="Intermediate values that is used to indicate incomplete type during type inference. ">IncompleteTypeNode</a> </td></tr>
+<tr id="row_1_73_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IncompleteTypeNode.html" target="_self">IncompleteTypeNode</a></td><td class="desc">Intermediate values that is used to indicate incomplete type during type inference </td></tr>
+<tr id="row_1_74_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Integer.html" target="_self">Integer</a></td><td class="desc">Container of constant int that adds more constructors </td></tr>
+<tr id="row_1_75_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImm.html" target="_self">IntImm</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IntImmNode.html" title="Constant integer literals in the program. ">IntImmNode</a> </td></tr>
+<tr id="row_1_76_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IntImmNode.html" target="_self">IntImmNode</a></td><td class="desc">Constant integer literals in the program </td></tr>
+<tr id="row_1_77_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModule.html" target="_self">IRModule</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1IRModuleNode.html" title="IRModule that holds functions and type definitions. ">IRModuleNode</a> </td></tr>
+<tr id="row_1_78_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1IRModuleNode.html" target="_self">IRModuleNode</a></td><td class="desc"><a class="el" href="classtvm_1_1IRModule.html" title="Managed reference class to IRModuleNode. ">IRModule</a> that holds functions and type definitions </td></tr>
+<tr id="row_1_79_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePath.html" target="_self">MapValuePath</a></td><td class="desc"></td></tr>
+<tr id="row_1_80_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MapValuePathNode.html" target="_self">MapValuePathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_81_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfo.html" target="_self">MemoryInfo</a></td><td class="desc">Defines memory info </td></tr>
+<tr id="row_1_82_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MemoryInfoNode.html" target="_self">MemoryInfoNode</a></td><td class="desc">Memory information of special memory region. Use <a class="el" href="classtvm_1_1MemoryInfo.html" title="Defines memory info. ">MemoryInfo</a> as its container type </td></tr>
+<tr id="row_1_83_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPath.html" target="_self">MissingArrayElementPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_84_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingArrayElementPathNode.html" target="_self">MissingArrayElementPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_85_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPath.html" target="_self">MissingMapEntryPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_86_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1MissingMapEntryPathNode.html" target="_self">MissingMapEntryPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_87_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupply.html" target="_self">NameSupply</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1NameSupplyNode.html" title="NameSupply can be used to generate unique names. ">NameSupplyNode</a> </td></tr>
+<tr id="row_1_88_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NameSupplyNode.html" target="_self">NameSupplyNode</a></td><td class="desc"><a class="el" href="classtvm_1_1NameSupply.html" title="Managed reference class to NameSupplyNode. ">NameSupply</a> can be used to generate unique names </td></tr>
+<tr id="row_1_89_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1NDArrayContainerTrait.html" target="_self">NDArrayContainerTrait</a></td><td class="desc"></td></tr>
+<tr id="row_1_90_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor.html" target="_self">NodeFunctor</a></td><td class="desc">A dynamically dispatched functor on the type of the first argument </td></tr>
+<tr id="row_1_91_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1NodeFunctor_3_01R_07const_01ObjectRef_01_6n_00_01Args_8_8_8_08_4.html" target="_self">NodeFunctor&lt; R(const ObjectRef &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_92_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPath.html" target="_self">ObjectPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_93_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathNode.html" target="_self">ObjectPathNode</a></td><td class="desc">Path to an object from some root object </td></tr>
+<tr id="row_1_94_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPair.html" target="_self">ObjectPathPair</a></td><td class="desc"></td></tr>
+<tr id="row_1_95_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ObjectPathPairNode.html" target="_self">ObjectPathPairNode</a></td><td class="desc">Pair of <code><a class="el" href="classtvm_1_1ObjectPath.html">ObjectPath</a></code>s, one for each object being tested for structural equality </td></tr>
+<tr id="row_1_96_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Op.html" target="_self">Op</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1OpNode.html" title="Primitive Op(builtin intrinsics) ">OpNode</a> </td></tr>
+<tr id="row_1_97_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpAttrMap.html" target="_self">OpAttrMap</a></td><td class="desc">Map&lt;Op,ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1Op.html" title="Managed reference class to OpNode. ">Op</a> </td></tr>
+<tr id="row_1_98_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpNode.html" target="_self">OpNode</a></td><td class="desc">Primitive Op(builtin intrinsics) </td></tr>
+<tr id="row_1_99_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1OpRegEntry.html" target="_self">OpRegEntry</a></td><td class="desc">Helper structure to register operators </td></tr>
+<tr id="row_1_100_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerType.html" target="_self">PointerType</a></td><td class="desc"></td></tr>
+<tr id="row_1_101_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PointerTypeNode.html" target="_self">PointerTypeNode</a></td><td class="desc">Low-level raw pointer type </td></tr>
+<tr id="row_1_102_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfo.html" target="_self">PoolInfo</a></td><td class="desc">Base class for <a class="el" href="classtvm_1_1WorkspacePoolInfo.html">WorkspacePoolInfo</a> and <a class="el" href="classtvm_1_1ConstantPoolInfo.html">ConstantPoolInfo</a> </td></tr>
+<tr id="row_1_103_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoNode.html" target="_self">PoolInfoNode</a></td><td class="desc">Describes a pool of memory accessible by one or more targets </td></tr>
+<tr id="row_1_104_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PoolInfoProperties.html" target="_self">PoolInfoProperties</a></td><td class="desc"></td></tr>
+<tr id="row_1_105_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1PoolInfoPropertiesNode.html" target="_self">PoolInfoPropertiesNode</a></td><td class="desc">Describes a pool of memory properties </td></tr>
+<tr id="row_1_106_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExpr.html" target="_self">PrimExpr</a></td><td class="desc">Reference to <a class="el" href="classtvm_1_1PrimExprNode.html" title="Base node of all primitive expressions. ">PrimExprNode</a> </td></tr>
+<tr id="row_1_107_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimExprNode.html" target="_self">PrimExprNode</a></td><td class="desc">Base node of all primitive expressions </td></tr>
+<tr id="row_1_108_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimType.html" target="_self">PrimType</a></td><td class="desc"></td></tr>
+<tr id="row_1_109_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1PrimTypeNode.html" target="_self">PrimTypeNode</a></td><td class="desc">Primitive data types used in the low-level IR </td></tr>
+<tr id="row_1_110_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Range.html" target="_self">Range</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> constainer </td></tr>
+<tr id="row_1_111_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RangeNode.html" target="_self">RangeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Range.html" title="Range constainer. ">Range</a> over one dimension </td></tr>
+<tr id="row_1_112_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_112_" class="arrow" onclick="toggleFolder('1_112_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable.html" target="_self">ReflectionVTable</a></td><td class="desc">Virtual function table to support IR/AST node reflection </td></tr>
+<tr id="row_1_112_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" target="_self">Registry</a></td><td class="desc"><a class="el" href="classtvm_1_1ReflectionVTable_1_1Registry.html" title="Registry of a reflection table. ">Registry</a> of a reflection table </td></tr>
+<tr id="row_1_113_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExpr.html" target="_self">RelayExpr</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayExprNode.html" title="Base node of all non-primitive expressions. ">RelayExprNode</a> </td></tr>
+<tr id="row_1_114_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayExprNode.html" target="_self">RelayExprNode</a></td><td class="desc">Base node of all non-primitive expressions </td></tr>
+<tr id="row_1_115_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefType.html" target="_self">RelayRefType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1RelayRefTypeNode.html" title="Reference Type High-level Relay IR. ">RelayRefTypeNode</a> </td></tr>
+<tr id="row_1_116_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RelayRefTypeNode.html" target="_self">RelayRefTypeNode</a></td><td class="desc">Reference <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> High-level Relay IR </td></tr>
+<tr id="row_1_117_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprLegacyPrinter.html" target="_self">ReprLegacyPrinter</a></td><td class="desc">Legacy behavior of <a class="el" href="classtvm_1_1ReprPrinter.html" title="A printer class to print the AST/IR nodes. ">ReprPrinter</a> </td></tr>
+<tr id="row_1_118_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1ReprPrinter.html" target="_self">ReprPrinter</a></td><td class="desc">A printer class to print the AST/IR nodes </td></tr>
+<tr id="row_1_119_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPath.html" target="_self">RootPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_120_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1RootPathNode.html" target="_self">RootPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_121_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualHandlerDefault.html" target="_self">SEqualHandlerDefault</a></td><td class="desc">The default handler for equality testing </td></tr>
+<tr id="row_1_122_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_122_" class="arrow" onclick="toggleFolder('1_122_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer.html" target="_self">SEqualReducer</a></td><td class="desc">A Reducer class to reduce the structural equality result of two objects </td></tr>
+<tr id="row_1_122_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SEqualReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors. </td></tr>
+<tr id="row_1_123_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashHandlerDefault.html" target="_self">SHashHandlerDefault</a></td><td class="desc">The default handler for hash key computation </td></tr>
+<tr id="row_1_124_" class="even" style="display:none;"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span id="arr_1_124_" class="arrow" onclick="toggleFolder('1_124_')">&#9658;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer.html" target="_self">SHashReducer</a></td><td class="desc">A Reducer class to reduce the structural hash value </td></tr>
+<tr id="row_1_124_0_" class="even" style="display:none;"><td class="entry"><span style="width:48px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SHashReducer_1_1Handler.html" target="_self">Handler</a></td><td class="desc">Internal handler that defines custom behaviors </td></tr>
+<tr id="row_1_125_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceName.html" target="_self">SourceName</a></td><td class="desc">The source name of a file span </td></tr>
+<tr id="row_1_126_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SourceNameNode.html" target="_self">SourceNameNode</a></td><td class="desc">The name of a source fragment </td></tr>
+<tr id="row_1_127_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Span.html" target="_self">Span</a></td><td class="desc"></td></tr>
+<tr id="row_1_128_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1SpanNode.html" target="_self">SpanNode</a></td><td class="desc">Stores locations in frontend source that generated a node </td></tr>
+<tr id="row_1_129_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralEqual.html" target="_self">StructuralEqual</a></td><td class="desc">Content-aware structural equality comparator for objects </td></tr>
+<tr id="row_1_130_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1StructuralHash.html" target="_self">StructuralHash</a></td><td class="desc">Content-aware structural hasing </td></tr>
+<tr id="row_1_131_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Target.html" target="_self">Target</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetNode.html" title="Compilation target. ">TargetNode</a> </td></tr>
+<tr id="row_1_132_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKind.html" target="_self">TargetKind</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetKindNode.html" title="Target kind, specifies the kind of the target. ">TargetKindNode</a> </td></tr>
+<tr id="row_1_133_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindAttrMap.html" target="_self">TargetKindAttrMap</a></td><td class="desc">Map&lt;TargetKind, ValueType&gt; used to store meta-information about <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
+<tr id="row_1_134_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindNode.html" target="_self">TargetKindNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Target.html" title="Managed reference class to TargetNode. ">Target</a> kind, specifies the kind of the target </td></tr>
+<tr id="row_1_135_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetKindRegEntry.html" target="_self">TargetKindRegEntry</a></td><td class="desc">Helper structure to register <a class="el" href="classtvm_1_1TargetKind.html" title="Managed reference class to TargetKindNode. ">TargetKind</a> </td></tr>
+<tr id="row_1_136_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetNode.html" target="_self">TargetNode</a></td><td class="desc">Compilation target </td></tr>
+<tr id="row_1_137_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTag.html" target="_self">TargetTag</a></td><td class="desc">Managed reference class to <a class="el" href="classtvm_1_1TargetTagNode.html" title="A target tag. ">TargetTagNode</a> </td></tr>
+<tr id="row_1_138_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagNode.html" target="_self">TargetTagNode</a></td><td class="desc">A target tag </td></tr>
+<tr id="row_1_139_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TargetTagRegEntry.html" target="_self">TargetTagRegEntry</a></td><td class="desc"></td></tr>
+<tr id="row_1_140_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineType.html" target="_self">TensorAffineType</a></td><td class="desc">Managed reference to AffineTypes </td></tr>
+<tr id="row_1_141_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorAffineTypeNode.html" target="_self">TensorAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TensorAffineType.html" title="Managed reference to AffineTypes. ">TensorAffineType</a> representation </td></tr>
+<tr id="row_1_142_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorType.html" target="_self">TensorType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TensorTypeNode.html" title="This is the most commonly used type in relay. TensorType have a fixed dimension, data type...">TensorTypeNode</a> </td></tr>
+<tr id="row_1_143_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TensorTypeNode.html" target="_self">TensorTypeNode</a></td><td class="desc">This is the most commonly used type in relay. <a class="el" href="classtvm_1_1TensorType.html" title="Managed reference to TensorTypeNode. ">TensorType</a> have a fixed dimension, data type </td></tr>
+<tr id="row_1_144_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineType.html" target="_self">TupleAffineType</a></td><td class="desc">Managed reference to TupleAffineTypes </td></tr>
+<tr id="row_1_145_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleAffineTypeNode.html" target="_self">TupleAffineTypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TupleAffineType.html" title="Managed reference to TupleAffineTypes. ">TupleAffineType</a> representation </td></tr>
+<tr id="row_1_146_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleType.html" target="_self">TupleType</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TupleTypeNode.html" title="The type of tuple values. ">TupleTypeNode</a> </td></tr>
+<tr id="row_1_147_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TupleTypeNode.html" target="_self">TupleTypeNode</a></td><td class="desc">The type of tuple values </td></tr>
+<tr id="row_1_148_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1Type.html" target="_self">Type</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeNode.html" title="Type is the base type of all types. ">TypeNode</a> </td></tr>
+<tr id="row_1_149_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCall.html" target="_self">TypeCall</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeCallNode.html" title="Type function application. ">TypeCallNode</a> </td></tr>
+<tr id="row_1_150_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeCallNode.html" target="_self">TypeCallNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> function application </td></tr>
+<tr id="row_1_151_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraint.html" target="_self">TypeConstraint</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeConstraintNode.html" title="Potential Constraints in a function. ">TypeConstraintNode</a> </td></tr>
+<tr id="row_1_152_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeConstraintNode.html" target="_self">TypeConstraintNode</a></td><td class="desc">Potential Constraints in a function </td></tr>
+<tr id="row_1_153_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeData.html" target="_self">TypeData</a></td><td class="desc">Stores all data for an Algebraic Data <a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> (ADT) </td></tr>
+<tr id="row_1_154_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeDataNode.html" target="_self">TypeDataNode</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeData.html" title="Stores all data for an Algebraic Data Type (ADT). ">TypeData</a> container node </td></tr>
+<tr id="row_1_155_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc.html" target="_self">TypedEnvFunc</a></td><td class="desc">Please refer to <a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html#TypedEnvFuncAnchor">TypedEnvFunc&lt;R(Args..)&gt;</a> </td></tr>
+<tr id="row_1_156_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypedEnvFunc_3_01R_07Args_8_8_8_08_4.html" target="_self">TypedEnvFunc&lt; R(Args...)&gt;</a></td><td class="desc">A typed version of <a class="el" href="classtvm_1_1EnvFunc.html" title="Managed reference to EnvFuncNode. ">EnvFunc</a>. It is backed by a GlobalFuncNode inte [...]
+<tr id="row_1_157_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor.html" target="_self">TypeFunctor</a></td><td class="desc"></td></tr>
+<tr id="row_1_158_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeFunctor_3_01R_07const_01Type_01_6n_00_01Args_8_8_8_08_4.html" target="_self">TypeFunctor&lt; R(const Type &amp;n, Args...)&gt;</a></td><td class="desc"></td></tr>
+<tr id="row_1_159_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeMutator.html" target="_self">TypeMutator</a></td><td class="desc"><a class="el" href="classtvm_1_1TypeMutator.html" title="TypeMutator that mutates expressions. ">TypeMutator</a> that mutates expressions </td></tr>
+<tr id="row_1_160_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeNode.html" target="_self">TypeNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> is the base type of all types </td></tr>
+<tr id="row_1_161_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelation.html" target="_self">TypeRelation</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeRelationNode.html" title="User defined type relation, it is an input-output relation on types. ">TypeRelationNode</a> </td></tr>
+<tr id="row_1_162_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeRelationNode.html" target="_self">TypeRelationNode</a></td><td class="desc">User defined type relation, it is an input-output relation on types </td></tr>
+<tr id="row_1_163_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporter.html" target="_self">TypeReporter</a></td><td class="desc">Container class of <a class="el" href="classtvm_1_1TypeReporter.html" title="Container class of TypeReporter. ">TypeReporter</a> </td></tr>
+<tr id="row_1_164_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeReporterNode.html" target="_self">TypeReporterNode</a></td><td class="desc">Reporter that reports back to the type resolution information </td></tr>
+<tr id="row_1_165_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVar.html" target="_self">TypeVar</a></td><td class="desc">Managed reference to <a class="el" href="classtvm_1_1TypeVarNode.html" title="Type parameter in functions. ">TypeVarNode</a> </td></tr>
+<tr id="row_1_166_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVarNode.html" target="_self">TypeVarNode</a></td><td class="desc"><a class="el" href="classtvm_1_1Type.html" title="Managed reference to TypeNode. ">Type</a> parameter in functions </td></tr>
+<tr id="row_1_167_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1TypeVisitor.html" target="_self">TypeVisitor</a></td><td class="desc">A type visitor that recursively visit types </td></tr>
+<tr id="row_1_168_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPath.html" target="_self">UnknownAttributeAccessPath</a></td><td class="desc"></td></tr>
+<tr id="row_1_169_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1UnknownAttributeAccessPathNode.html" target="_self">UnknownAttributeAccessPathNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_170_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDevice.html" target="_self">VirtualDevice</a></td><td class="desc">Managed reference class to <code><a class="el" href="classtvm_1_1VirtualDeviceNode.html" title="Describes at compile time the constraints on where data is to be stored at runtime down to the (virtu.. [...]
+<tr id="row_1_171_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceCache.html" target="_self">VirtualDeviceCache</a></td><td class="desc">A cache of <code>VirtualDevices</code>. This can be used: </td></tr>
+<tr id="row_1_172_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1VirtualDeviceNode.html" target="_self">VirtualDeviceNode</a></td><td class="desc">Describes at compile time the constraints on where data is to be stored at runtime down to the (virtual) device and memory scope level, and how to compile code to compute that data. Used by t [...]
+<tr id="row_1_173_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1With.html" target="_self">With</a></td><td class="desc">RAII wrapper function to enter and exit a context object similar to python's with syntax </td></tr>
+<tr id="row_1_174_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspaceMemoryPools.html" target="_self">WorkspaceMemoryPools</a></td><td class="desc"></td></tr>
+<tr id="row_1_175_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspaceMemoryPoolsNode.html" target="_self">WorkspaceMemoryPoolsNode</a></td><td class="desc"></td></tr>
+<tr id="row_1_176_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classtvm_1_1WorkspacePoolInfo.html" target="_self">WorkspacePoolInfo</a></td><td class="desc"></td></tr>
+<tr id="row_1_177_" class="even" style="display:none;"><td class="entry"><span style="width:32px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm_1_1WorkspacePoolInfoNode.html" target="_self">WorkspacePoolInfoNode</a></td><td class="desc"></td></tr>
 <tr id="row_2_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="classGlobalVar.html" target="_self">GlobalVar</a></td><td class="desc"></td></tr>
 <tr id="row_3_"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structMemoryManagerInterface.html" target="_self">MemoryManagerInterface</a></td><td class="desc"></td></tr>
 <tr id="row_4_" class="even"><td class="entry"><span style="width:16px;display:inline-block;">&#160;</span><span class="icona"><span class="icon">C</span></span><a class="el" href="structtvm__workspace__t.html" target="_self">tvm_workspace_t</a></td><td class="desc"></td></tr>
diff --git a/docs/reference/api/doxygen/array_8h.html b/docs/reference/api/doxygen/array_8h.html
index fcb91a94ba..9e70f04de9 100644
--- a/docs/reference/api/doxygen/array_8h.html
+++ b/docs/reference/api/doxygen/array_8h.html
@@ -90,7 +90,7 @@ Include dependency graph for array.h:</div>
 </div><div class="textblock"><div class="dynheader">
 This graph shows which files directly or indirectly include this file:</div>
 <div class="dyncontent">
-<div class="center"><iframe scrolling="no" frameborder="0" src="array_8h__dep__incl.svg" width="4808" height="1110"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
+<div class="center"><iframe scrolling="no" frameborder="0" src="array_8h__dep__incl.svg" width="4392" height="1110"><p><b>This browser is not able to show SVG: try Firefox, Chrome, Safari, or Opera instead.</b></p></iframe>
 </div>
 </div>
 </div>
diff --git a/docs/reference/api/doxygen/array_8h__dep__incl.svg b/docs/reference/api/doxygen/array_8h__dep__incl.svg
index 6ef75830d4..98e49cd36c 100644
--- a/docs/reference/api/doxygen/array_8h__dep__incl.svg
+++ b/docs/reference/api/doxygen/array_8h__dep__incl.svg
@@ -4,1335 +4,1329 @@
 <!-- Generated by graphviz version 2.40.1 (20161225.0304)
  -->
 <!-- Title: include/tvm/runtime/container/array.h Pages: 1 -->
-<svg width="3606pt" height="832pt"
- viewBox="0.00 0.00 3606.00 832.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg width="3294pt" height="832pt"
+ viewBox="0.00 0.00 3294.00 832.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 828)">
 <title>include/tvm/runtime/container/array.h</title>
-<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-828 3602,-828 3602,4 -4,4"/>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-828 3290,-828 3290,4 -4,4"/>
 <!-- Node20 -->
 <g id="node1" class="node">
 <title>Node20</title>
-<polygon fill="#bfbfbf" stroke="#000000" points="2205,-793.5 2205,-823.5 2321,-823.5 2321,-793.5 2205,-793.5"/>
-<text text-anchor="start" x="2213" y="-811.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2263" y="-800.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/container/array.h</text>
+<polygon fill="#bfbfbf" stroke="#000000" points="1928,-793.5 1928,-823.5 2044,-823.5 2044,-793.5 1928,-793.5"/>
+<text text-anchor="start" x="1936" y="-811.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1986" y="-800.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/container/array.h</text>
 </g>
 <!-- Node21 -->
 <g id="node2" class="node">
 <title>Node21</title>
 <g id="a_node2"><a xlink:href="ir_2adt_8h.html" target="_top" xlink:title="Algebraic data type definitions. ">
-<polygon fill="#ffffff" stroke="#000000" points="861,-670.5 861,-689.5 973,-689.5 973,-670.5 861,-670.5"/>
-<text text-anchor="middle" x="917" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/adt.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1313,-670.5 1313,-689.5 1425,-689.5 1425,-670.5 1313,-670.5"/>
+<text text-anchor="middle" x="1369" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node21 -->
 <g id="edge1" class="edge">
 <title>Node20&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M2194.5072,-807.3851C1955.5434,-803.2458 1167.3288,-787.4081 1058,-757 1006.6755,-742.7249 953.4483,-706.8884 929.7277,-689.6105"/>
-<polygon fill="#191970" stroke="#191970" points="2194.7377,-810.8895 2204.7964,-807.562 2194.8581,-803.8905 2194.7377,-810.8895"/>
+<path fill="none" stroke="#191970" d="M1917.3481,-804.3785C1804.3324,-797.041 1584.2214,-780.2695 1510,-757 1459.4331,-741.1465 1406.2577,-706.4107 1382.2001,-689.5538"/>
+<polygon fill="#191970" stroke="#191970" points="1917.408,-807.8894 1927.6119,-805.038 1917.8569,-800.9038 1917.408,-807.8894"/>
 </g>
 <!-- Node22 -->
 <g id="node3" class="node">
 <title>Node22</title>
 <g id="a_node3"><a xlink:href="ir_2module_8h.html" target="_top" xlink:title="IRModule that holds the functions and type definitions. ">
-<polygon fill="#ffffff" stroke="#000000" points="1354.5,-609 1354.5,-628 1487.5,-628 1487.5,-609 1354.5,-609"/>
-<text text-anchor="middle" x="1421" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/module.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1111.5,-609 1111.5,-628 1244.5,-628 1244.5,-609 1111.5,-609"/>
+<text text-anchor="middle" x="1178" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/module.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node22 -->
-<g id="edge95" class="edge">
+<g id="edge94" class="edge">
 <title>Node20&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M2194.7056,-807.0914C2054.1257,-803.511 1737.0399,-791.7361 1635,-757 1547.8604,-727.3362 1461.0492,-654.4444 1431.5585,-628.1529"/>
-<polygon fill="#191970" stroke="#191970" points="2194.8381,-810.5957 2204.922,-807.345 2195.0119,-803.5978 2194.8381,-810.5957"/>
+<path fill="none" stroke="#191970" d="M1917.4566,-805.2517C1790.9182,-798.7296 1525.478,-782.451 1437,-757 1335.0727,-727.6802 1227.7968,-654.5734 1191.1517,-628.1892"/>
+<polygon fill="#191970" stroke="#191970" points="1917.627,-808.7648 1927.7922,-805.7786 1917.9834,-801.7739 1917.627,-808.7648"/>
 </g>
 <!-- Node26 -->
 <g id="node6" class="node">
 <title>Node26</title>
 <g id="a_node6"><a xlink:href="ir_2transform_8h.html" target="_top" xlink:title="include/tvm/ir/transform.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="1261,-475 1261,-494 1405,-494 1405,-475 1261,-475"/>
-<text text-anchor="middle" x="1333" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/transform.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="320,-475 320,-494 464,-494 464,-475 320,-475"/>
+<text text-anchor="middle" x="392" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node26 -->
-<g id="edge96" class="edge">
+<g id="edge95" class="edge">
 <title>Node20&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M2194.7685,-806.2991C2074.1203,-801.6443 1828.3707,-788.5196 1749,-757 1696.867,-736.297 1649,-736.0934 1649,-680 1649,-680 1649,-680 1649,-618.5 1649,-560.6234 1467.0596,-513.4271 1379.2875,-494.026"/>
-<polygon fill="#191970" stroke="#191970" points="2194.8667,-809.8052 2204.9916,-806.6848 2195.1307,-802.8102 2194.8667,-809.8052"/>
+<path fill="none" stroke="#191970" d="M1917.3037,-807.1405C1592.6699,-800.2743 228,-766.2795 228,-680 228,-680 228,-680 228,-618.5 228,-550.4794 313.6461,-510.7908 361.1529,-494.008"/>
+<polygon fill="#191970" stroke="#191970" points="1917.4369,-810.644 1927.5082,-807.3547 1917.5838,-803.6456 1917.4369,-810.644"/>
 </g>
 <!-- Node45 -->
 <g id="node11" class="node">
 <title>Node45</title>
 <g id="a_node11"><a xlink:href="builder_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/builder.h">
-<polygon fill="#ffffff" stroke="#000000" points="1630,-268.5 1630,-298.5 1782,-298.5 1782,-268.5 1630,-268.5"/>
-<text text-anchor="start" x="1638" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1706" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builder.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="403,-268.5 403,-298.5 555,-298.5 555,-268.5 403,-268.5"/>
+<text text-anchor="start" x="411" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="479" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/builder.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node45 -->
-<g id="edge101" class="edge">
+<g id="edge100" class="edge">
 <title>Node20&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2194.2824,-805.1576C2057.2398,-796.6085 1763,-768.0059 1763,-680 1763,-680 1763,-680 1763,-618.5 1763,-493.4293 1723.9099,-345.767 1710.4727,-298.7027"/>
-<polygon fill="#191970" stroke="#191970" points="2194.472,-808.6753 2204.6651,-805.7856 2194.8947,-801.6881 2194.472,-808.6753"/>
+<path fill="none" stroke="#191970" d="M1917.3995,-807.3379C1629.7349,-802.3245 528.5116,-781.6821 375,-757 255.5908,-737.801 114,-800.9428 114,-680 114,-680 114,-680 114,-417.5 114,-376.3553 122.7299,-359.2072 156,-335 194.3405,-307.1036 320.1121,-293.6874 402.9286,-287.7305"/>
+<polygon fill="#191970" stroke="#191970" points="1917.6289,-810.8423 1927.6882,-807.5166 1917.7505,-803.8434 1917.6289,-810.8423"/>
 </g>
 <!-- Node46 -->
 <g id="node12" class="node">
 <title>Node46</title>
 <g id="a_node12"><a xlink:href="measure__callback_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_callback.h">
-<polygon fill="#ffffff" stroke="#000000" points="2631,-67.5 2631,-97.5 2783,-97.5 2783,-67.5 2631,-67.5"/>
-<text text-anchor="start" x="2639" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2707" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_callback.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2148,-67.5 2148,-97.5 2300,-97.5 2300,-67.5 2148,-67.5"/>
+<text text-anchor="start" x="2156" y="-85.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2224" y="-74.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_callback.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node46 -->
-<g id="edge106" class="edge">
+<g id="edge105" class="edge">
 <title>Node20&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2331.1242,-804.9882C2469.8386,-797.4885 2780.7524,-778.8024 2885,-757 2988.4554,-735.3633 3110,-785.6937 3110,-680 3110,-680 3110,-680 3110,-350.5 3110,-195.2663 2900.1089,-124.5104 2783.275,-97.1496"/>
-<polygon fill="#191970" stroke="#191970" points="2330.8374,-801.4985 2321.0397,-805.5301 2331.2131,-808.4884 2330.8374,-801.4985"/>
+<path fill="none" stroke="#191970" d="M2054.3193,-805.9315C2195.4166,-800.1402 2514.6207,-784.3893 2620,-757 2699.8857,-736.2368 2789,-762.5399 2789,-680 2789,-680 2789,-680 2789,-350.5 2789,-144.8258 2574.9133,-199.6556 2380,-134 2343.1057,-121.5723 2301.204,-107.7559 2270.1242,-97.5649"/>
+<polygon fill="#191970" stroke="#191970" points="2053.917,-802.4448 2044.0672,-806.3476 2054.2009,-809.4391 2053.917,-802.4448"/>
 </g>
 <!-- Node47 -->
 <g id="node13" class="node">
 <title>Node47</title>
 <g id="a_node13"><a xlink:href="task__scheduler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/task_scheduler.h">
-<polygon fill="#ffffff" stroke="#000000" points="2806,-.5 2806,-30.5 2958,-30.5 2958,-.5 2806,-.5"/>
-<text text-anchor="start" x="2814" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2882" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/task_scheduler.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2403,-.5 2403,-30.5 2555,-30.5 2555,-.5 2403,-.5"/>
+<text text-anchor="start" x="2411" y="-18.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2479" y="-7.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/task_scheduler.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node47 -->
-<g id="edge114" class="edge">
+<g id="edge113" class="edge">
 <title>Node20&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M2331.1451,-807.4035C2489.5901,-804.2683 2878.795,-793.0322 3004,-757 3073.7445,-736.9285 3148,-752.5752 3148,-680 3148,-680 3148,-680 3148,-149.5 3148,-63.9211 3036.8274,-32.9888 2958.2249,-21.8138"/>
-<polygon fill="#191970" stroke="#191970" points="2330.9781,-803.9059 2321.0476,-807.5984 2331.1133,-810.9046 2330.9781,-803.9059"/>
+<path fill="none" stroke="#191970" d="M2054.4376,-805.7569C2222.9186,-798.7013 2648.4879,-778.9316 2709,-757 2917.3119,-681.5007 2865,-505.0716 2865,-283.5 2865,-283.5 2865,-283.5 2865,-149.5 2865,-89.8016 2813.4702,-91.4316 2759,-67 2723.3727,-51.02 2625.0651,-35.1479 2555.0194,-25.3376"/>
+<polygon fill="#191970" stroke="#191970" points="2054.0446,-802.2702 2044.1989,-806.1833 2054.3359,-809.2641 2054.0446,-802.2702"/>
 </g>
 <!-- Node48 -->
 <g id="node14" class="node">
 <title>Node48</title>
 <g id="a_node14"><a xlink:href="tune__context_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/tune_context.h">
-<polygon fill="#ffffff" stroke="#000000" points="1986,-134.5 1986,-164.5 2138,-164.5 2138,-134.5 1986,-134.5"/>
-<text text-anchor="start" x="1994" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2062" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1700,-134.5 1700,-164.5 1852,-164.5 1852,-134.5 1700,-134.5"/>
+<text text-anchor="start" x="1708" y="-152.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1776" y="-141.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/tune_context.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node48 -->
-<g id="edge115" class="edge">
+<g id="edge114" class="edge">
 <title>Node20&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M2194.6859,-794.5323C2103.4041,-773.8698 1953,-731.9433 1953,-680 1953,-680 1953,-680 1953,-551.5 1953,-423.7802 1959.4401,-389.1084 2000,-268 2013.1986,-228.59 2038.252,-186.357 2052.1291,-164.5165"/>
-<polygon fill="#191970" stroke="#191970" points="2194.169,-798.003 2204.6914,-796.7658 2195.6941,-791.1712 2194.169,-798.003"/>
+<path fill="none" stroke="#191970" d="M1917.8601,-808.3959C1584.4553,-807.6703 137.6539,-801.9348 54,-757 17.177,-737.2205 0,-721.799 0,-680 0,-680 0,-680 0,-283.5 0,-196.8971 1360.5089,-159.2406 1699.9718,-151.1927"/>
+<polygon fill="#191970" stroke="#191970" points="1917.8538,-811.8958 1927.8612,-808.4169 1917.8686,-804.8958 1917.8538,-811.8958"/>
 </g>
 <!-- Node49 -->
 <g id="node15" class="node">
 <title>Node49</title>
 <g id="a_node15"><a xlink:href="database_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/database.h">
-<polygon fill="#ffffff" stroke="#000000" points="2210,-268.5 2210,-298.5 2362,-298.5 2362,-268.5 2210,-268.5"/>
-<text text-anchor="start" x="2218" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2286" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1927,-268.5 1927,-298.5 2079,-298.5 2079,-268.5 1927,-268.5"/>
+<text text-anchor="start" x="1935" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2003" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/database.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node49 -->
-<g id="edge103" class="edge">
+<g id="edge102" class="edge">
 <title>Node20&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M2331.2708,-802.1909C2404.6827,-791.2118 2511,-761.2804 2511,-680 2511,-680 2511,-680 2511,-618.5 2511,-548.151 2498.9688,-526.892 2459,-469 2444.8391,-448.4889 2433.7885,-450.4586 2416,-433 2391.5136,-408.9678 2326.2421,-331.6002 2298.6236,-298.6202"/>
-<polygon fill="#191970" stroke="#191970" points="2330.5588,-798.7566 2321.145,-803.6177 2331.5355,-805.6881 2330.5588,-798.7566"/>
+<path fill="none" stroke="#191970" d="M1993.8702,-783.7668C1996.2384,-775.3558 1998.5822,-765.8456 2000,-757 2024.9981,-601.032 2026.2825,-559.6856 2017,-402 2014.8087,-364.7756 2008.7775,-321.3134 2005.3897,-298.7885"/>
+<polygon fill="#191970" stroke="#191970" points="1990.5121,-782.7804 1991.0301,-793.3626 1997.2243,-784.7671 1990.5121,-782.7804"/>
 </g>
 <!-- Node50 -->
 <g id="node16" class="node">
 <title>Node50</title>
 <g id="a_node16"><a xlink:href="search__strategy_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/search_strategy.h">
-<polygon fill="#ffffff" stroke="#000000" points="2456,-201.5 2456,-231.5 2608,-231.5 2608,-201.5 2456,-201.5"/>
-<text text-anchor="start" x="2464" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2532" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_strategy.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2079,-201.5 2079,-231.5 2231,-231.5 2231,-201.5 2079,-201.5"/>
+<text text-anchor="start" x="2087" y="-219.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2155" y="-208.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/search_strategy.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node50 -->
-<g id="edge112" class="edge">
+<g id="edge111" class="edge">
 <title>Node20&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2331.3919,-804.1167C2518.3319,-791.2274 3024,-749.9016 3024,-680 3024,-680 3024,-680 3024,-417.5 3024,-316.8844 2948.6222,-302.2074 2854,-268 2773.0738,-238.7439 2674.7298,-226.0737 2608.1814,-220.6075"/>
-<polygon fill="#191970" stroke="#191970" points="2330.8749,-800.6437 2321.1367,-804.8172 2331.352,-807.6274 2330.8749,-800.6437"/>
+<path fill="none" stroke="#191970" d="M2054.5526,-802.901C2178.922,-790.8403 2429,-757.014 2429,-680 2429,-680 2429,-680 2429,-417.5 2429,-357.9137 2258.4022,-267.2155 2186.4313,-231.6034"/>
+<polygon fill="#191970" stroke="#191970" points="2053.9948,-799.4381 2044.3701,-803.867 2054.656,-806.4068 2053.9948,-799.4381"/>
 </g>
 <!-- Node51 -->
 <g id="node17" class="node">
 <title>Node51</title>
 <g id="a_node17"><a xlink:href="extracted__task_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/extracted_task.h">
-<polygon fill="#ffffff" stroke="#000000" points="0,-268.5 0,-298.5 152,-298.5 152,-268.5 0,-268.5"/>
-<text text-anchor="start" x="8" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="76" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extracted_task.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="28,-268.5 28,-298.5 180,-298.5 180,-268.5 28,-268.5"/>
+<text text-anchor="start" x="36" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="104" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/extracted_task.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node51 -->
-<g id="edge104" class="edge">
+<g id="edge103" class="edge">
 <title>Node20&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M2194.7874,-808.2138C1821.0047,-806.2094 38,-790.8388 38,-680 38,-680 38,-680 38,-417.5 38,-373.0065 57.6309,-323.2478 68.6732,-298.778"/>
-<polygon fill="#191970" stroke="#191970" points="2194.8965,-811.7144 2204.9146,-808.2667 2194.9331,-804.7145 2194.8965,-811.7144"/>
+<path fill="none" stroke="#191970" d="M1917.5994,-807.9374C1591.5888,-805.0871 207.7238,-791.0213 123,-757 75.6975,-738.0054 38,-730.9737 38,-680 38,-680 38,-680 38,-417.5 38,-370.3026 71.3585,-322.4861 90.6439,-298.7859"/>
+<polygon fill="#191970" stroke="#191970" points="1917.8124,-811.4392 1927.8424,-808.0262 1917.8732,-804.4395 1917.8124,-811.4392"/>
 </g>
 <!-- Node52 -->
 <g id="node18" class="node">
 <title>Node52</title>
 <g id="a_node18"><a xlink:href="profiler_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/profiler.h">
-<polygon fill="#ffffff" stroke="#000000" points="1800,-268.5 1800,-298.5 1952,-298.5 1952,-268.5 1800,-268.5"/>
-<text text-anchor="start" x="1808" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1876" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiler.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1270,-268.5 1270,-298.5 1422,-298.5 1422,-268.5 1270,-268.5"/>
+<text text-anchor="start" x="1278" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1346" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/profiler.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node52 -->
-<g id="edge108" class="edge">
+<g id="edge107" class="edge">
 <title>Node20&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M2194.6633,-804.5943C2112.0965,-798.7005 1979.2336,-785.0939 1939,-757 1906.8626,-734.5594 1896,-719.1969 1896,-680 1896,-680 1896,-680 1896,-618.5 1896,-521.7312 1808.3263,-526.1189 1782,-433 1766.4859,-378.125 1823.1377,-324.0702 1855.2791,-298.6119"/>
-<polygon fill="#191970" stroke="#191970" points="2194.5035,-808.0916 2204.7218,-805.2921 2194.988,-801.1084 2194.5035,-808.0916"/>
+<path fill="none" stroke="#191970" d="M1917.823,-803.7488C1789.3072,-792.9097 1524,-760.7539 1524,-680 1524,-680 1524,-680 1524,-618.5 1524,-518.7688 1515.1401,-488.2107 1465,-402 1439.4947,-358.1463 1393.6856,-319.1622 1366.956,-298.6874"/>
+<polygon fill="#191970" stroke="#191970" points="1917.7017,-807.2506 1927.9552,-804.5831 1918.2761,-800.2742 1917.7017,-807.2506"/>
 </g>
 <!-- Node53 -->
 <g id="node19" class="node">
 <title>Node53</title>
 <g id="a_node19"><a xlink:href="space__generator_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/space_generator.h">
-<polygon fill="#ffffff" stroke="#000000" points="1044,-268.5 1044,-298.5 1196,-298.5 1196,-268.5 1044,-268.5"/>
-<text text-anchor="start" x="1052" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1120" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/space_generator.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1617,-268.5 1617,-298.5 1769,-298.5 1769,-268.5 1617,-268.5"/>
+<text text-anchor="start" x="1625" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1693" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/space_generator.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node53 -->
-<g id="edge113" class="edge">
+<g id="edge112" class="edge">
 <title>Node20&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M2194.5474,-807.5793C1929.6997,-803.802 980.4156,-788.1979 849,-757 769.0854,-738.0284 681,-762.1357 681,-680 681,-680 681,-680 681,-618.5 681,-552.0957 734.6342,-554.8938 772,-500 820.1225,-429.3035 801.256,-384.4929 871,-335 898.4054,-315.5521 981.2846,-301.0211 1043.9608,-292.4372"/>
-<polygon fill="#191970" stroke="#191970" points="2194.7436,-811.0823 2204.7922,-807.7243 2194.8428,-804.083 2194.7436,-811.0823"/>
+<path fill="none" stroke="#191970" d="M1949.7584,-787.8593C1916.3792,-765.8519 1872,-727.635 1872,-680 1872,-680 1872,-680 1872,-551.5 1872,-459.5534 1794.8181,-466.2039 1729,-402 1712.1142,-385.5284 1699.461,-387.6087 1690,-366 1680.4003,-344.0744 1684.6018,-315.6468 1688.6835,-298.5307"/>
+<polygon fill="#191970" stroke="#191970" points="1948.0891,-790.9458 1958.4038,-793.366 1951.8497,-785.0417 1948.0891,-790.9458"/>
+</g>
+<!-- Node146 -->
+<g id="node26" class="node">
+<title>Node146</title>
+<g id="a_node26"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
+<polygon fill="#ffffff" stroke="#000000" points="2363,-268.5 2363,-298.5 2515,-298.5 2515,-268.5 2363,-268.5"/>
+<text text-anchor="start" x="2371" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2439" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
+</a>
+</g>
+</g>
+<!-- Node20&#45;&gt;Node146 -->
+<g id="edge101" class="edge">
+<title>Node20&#45;&gt;Node146</title>
+<path fill="none" stroke="#191970" d="M2054.3211,-804.8881C2174.455,-797.9439 2413.6543,-781.2415 2443,-757 2470.6362,-734.1707 2467,-715.846 2467,-680 2467,-680 2467,-680 2467,-417.5 2467,-373.5323 2452.3775,-323.1326 2444.2666,-298.5584"/>
+<polygon fill="#191970" stroke="#191970" points="2053.9126,-801.4056 2044.1285,-805.4703 2054.3119,-808.3942 2053.9126,-801.4056"/>
 </g>
 <!-- Node147 -->
 <g id="node27" class="node">
 <title>Node147</title>
-<g id="a_node27"><a xlink:href="meta__schedule_2cost__model_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/cost_model.h">
-<polygon fill="#ffffff" stroke="#000000" points="2664,-268.5 2664,-298.5 2816,-298.5 2816,-268.5 2664,-268.5"/>
-<text text-anchor="start" x="2672" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2740" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/cost_model.h</text>
+<g id="a_node27"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
+<polygon fill="#ffffff" stroke="#000000" points="2495,-335.5 2495,-365.5 2647,-365.5 2647,-335.5 2495,-335.5"/>
+<text text-anchor="start" x="2503" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2571" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_candidate.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node147 -->
-<g id="edge102" class="edge">
+<g id="edge106" class="edge">
 <title>Node20&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M2331.2686,-805.6984C2471.3038,-798.0865 2778,-771.0249 2778,-680 2778,-680 2778,-680 2778,-417.5 2778,-373.0065 2758.3691,-323.2478 2747.3268,-298.778"/>
-<polygon fill="#191970" stroke="#191970" points="2330.8785,-802.214 2321.076,-806.2341 2331.2459,-809.2044 2330.8785,-802.214"/>
+<path fill="none" stroke="#191970" d="M2054.076,-804.4656C2181.1084,-796.4937 2444.7432,-777.6926 2481,-757 2517.9798,-735.8948 2538,-722.5786 2538,-680 2538,-680 2538,-680 2538,-484.5 2538,-440.4155 2555.0479,-390.4779 2564.6373,-365.8751"/>
+<polygon fill="#191970" stroke="#191970" points="2053.8263,-800.9743 2044.0632,-805.0893 2054.2615,-807.9608 2053.8263,-800.9743"/>
 </g>
 <!-- Node148 -->
 <g id="node28" class="node">
 <title>Node148</title>
-<g id="a_node28"><a xlink:href="measure__candidate_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/measure_candidate.h">
-<polygon fill="#ffffff" stroke="#000000" points="2522,-335.5 2522,-365.5 2674,-365.5 2674,-335.5 2522,-335.5"/>
-<text text-anchor="start" x="2530" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2598" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/measure_candidate.h</text>
+<g id="a_node28"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
+<polygon fill="#ffffff" stroke="#000000" points="2609,-268.5 2609,-298.5 2761,-298.5 2761,-268.5 2609,-268.5"/>
+<text text-anchor="start" x="2617" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2685" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/feature_extractor.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node148 -->
-<g id="edge107" class="edge">
+<g id="edge104" class="edge">
 <title>Node20&#45;&gt;Node148</title>
-<path fill="none" stroke="#191970" d="M2331.0802,-802.5645C2428.2695,-793.5072 2597.4887,-775.3464 2619,-757 2646.2738,-733.7389 2643,-715.846 2643,-680 2643,-680 2643,-680 2643,-484.5 2643,-439.3452 2619.7529,-389.8758 2606.6765,-365.6211"/>
-<polygon fill="#191970" stroke="#191970" points="2330.6779,-799.0867 2321.0421,-803.4915 2331.3217,-806.057 2330.6779,-799.0867"/>
+<path fill="none" stroke="#191970" d="M2054.302,-804.2132C2177.0325,-796.12 2430.077,-777.548 2516,-757 2607.428,-735.1356 2713,-774.006 2713,-680 2713,-680 2713,-680 2713,-417.5 2713,-373.5323 2698.3775,-323.1326 2690.2666,-298.5584"/>
+<polygon fill="#191970" stroke="#191970" points="2054.0166,-800.7243 2044.2668,-804.8707 2054.4743,-807.7093 2054.0166,-800.7243"/>
 </g>
 <!-- Node149 -->
 <g id="node29" class="node">
 <title>Node149</title>
-<g id="a_node29"><a xlink:href="feature__extractor_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/feature_extractor.h">
-<polygon fill="#ffffff" stroke="#000000" points="2418,-268.5 2418,-298.5 2570,-298.5 2570,-268.5 2418,-268.5"/>
-<text text-anchor="start" x="2426" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2494" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/feature_extractor.h</text>
+<g id="a_node29"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
+<polygon fill="#ffffff" stroke="#000000" points="2173,-335.5 2173,-365.5 2325,-365.5 2325,-335.5 2173,-335.5"/>
+<text text-anchor="start" x="2181" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2249" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/runner.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node149 -->
-<g id="edge105" class="edge">
+<g id="edge108" class="edge">
 <title>Node20&#45;&gt;Node149</title>
-<path fill="none" stroke="#191970" d="M2331.3814,-800.5078C2378.7938,-793.2632 2442.5795,-780.0698 2495,-757 2549.621,-732.9618 2605,-739.6765 2605,-680 2605,-680 2605,-680 2605,-484.5 2605,-446.8735 2609.3508,-431.5041 2586,-402 2563.5499,-373.6339 2535.4501,-394.3661 2513,-366 2497.4936,-346.4074 2494.2031,-316.4555 2493.7422,-298.5426"/>
-<polygon fill="#191970" stroke="#191970" points="2330.6017,-797.0849 2321.2201,-802.008 2331.6241,-804.0098 2330.6017,-797.0849"/>
-</g>
-<!-- Node150 -->
-<g id="node30" class="node">
-<title>Node150</title>
-<g id="a_node30"><a xlink:href="runner_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/runner.h">
-<polygon fill="#ffffff" stroke="#000000" points="2806,-335.5 2806,-365.5 2958,-365.5 2958,-335.5 2806,-335.5"/>
-<text text-anchor="start" x="2814" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2882" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/runner.h</text>
-</a>
-</g>
-</g>
-<!-- Node20&#45;&gt;Node150 -->
-<g id="edge109" class="edge">
-<title>Node20&#45;&gt;Node150</title>
-<path fill="none" stroke="#191970" d="M2331.2541,-806.3149C2426.5156,-802.0932 2593.2614,-790.0869 2643,-757 2793.6405,-656.7918 2862.3066,-426.6533 2878.2667,-365.598"/>
-<polygon fill="#191970" stroke="#191970" points="2330.9767,-802.8235 2321.1348,-806.746 2331.2747,-809.8171 2330.9767,-802.8235"/>
+<path fill="none" stroke="#191970" d="M2054.1037,-794.757C2145.9085,-774.2413 2298,-732.353 2298,-680 2298,-680 2298,-680 2298,-484.5 2298,-438.9223 2272.6865,-389.6379 2258.4477,-365.5208"/>
+<polygon fill="#191970" stroke="#191970" points="2053.3309,-791.3432 2044.3188,-796.9139 2054.8378,-798.1791 2053.3309,-791.3432"/>
 </g>
 <!-- Node160 -->
 <g id="node38" class="node">
 <title>Node160</title>
 <g id="a_node38"><a xlink:href="ir_2function_8h.html" target="_top" xlink:title="Function nodes. ">
-<polygon fill="#ffffff" stroke="#000000" points="2291,-670.5 2291,-689.5 2427,-689.5 2427,-670.5 2291,-670.5"/>
-<text text-anchor="middle" x="2359" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/function.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2052,-670.5 2052,-689.5 2188,-689.5 2188,-670.5 2052,-670.5"/>
+<text text-anchor="middle" x="2120" y="-677.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node160 -->
-<g id="edge82" class="edge">
+<g id="edge81" class="edge">
 <title>Node20&#45;&gt;Node160</title>
-<path fill="none" stroke="#191970" d="M2280.5165,-785.0535C2301.788,-756.5807 2336.6846,-709.8701 2351.7266,-689.7358"/>
-<polygon fill="#191970" stroke="#191970" points="2277.5706,-783.1488 2274.3894,-793.2548 2283.1784,-787.3383 2277.5706,-783.1488"/>
+<path fill="none" stroke="#191970" d="M2009.3079,-786.1488C2038.8772,-757.7932 2088.5799,-710.1305 2109.8475,-689.7358"/>
+<polygon fill="#191970" stroke="#191970" points="2006.6929,-783.8071 2001.8978,-793.2548 2011.5379,-788.8595 2006.6929,-783.8071"/>
 </g>
 <!-- Node168 -->
 <g id="node43" class="node">
 <title>Node168</title>
 <g id="a_node43"><a xlink:href="ir_2type_8h.html" target="_top" xlink:title="IR/AST nodes for the unified type system in TVM. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1067,-732 1067,-751 1185,-751 1185,-732 1067,-732"/>
-<text text-anchor="middle" x="1126" y="-739" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1126,-732 1126,-751 1244,-751 1244,-732 1126,-732"/>
+<text text-anchor="middle" x="1185" y="-739" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node168 -->
-<g id="edge97" class="edge">
+<g id="edge96" class="edge">
 <title>Node20&#45;&gt;Node168</title>
-<path fill="none" stroke="#191970" d="M2194.5898,-804.4688C1988.8542,-792.3454 1378.101,-756.3556 1185.1304,-744.9844"/>
-<polygon fill="#191970" stroke="#191970" points="2194.4705,-807.9677 2204.6591,-805.0621 2194.8823,-800.9799 2194.4705,-807.9677"/>
+<path fill="none" stroke="#191970" d="M1917.6607,-806.1133C1789.8572,-801.091 1506.1414,-787.301 1269,-757 1256.7908,-755.4399 1243.6685,-753.2622 1231.5088,-751.0312"/>
+<polygon fill="#191970" stroke="#191970" points="1917.5688,-809.6122 1927.6968,-806.5022 1917.8399,-802.6175 1917.5688,-809.6122"/>
 </g>
 <!-- Node159 -->
 <g id="node44" class="node">
 <title>Node159</title>
 <g id="a_node44"><a xlink:href="schedule__rule_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/schedule_rule.h">
-<polygon fill="#ffffff" stroke="#000000" points="929,-335.5 929,-365.5 1081,-365.5 1081,-335.5 929,-335.5"/>
-<text text-anchor="start" x="937" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="1005" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/schedule_rule.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1699,-335.5 1699,-365.5 1851,-365.5 1851,-335.5 1699,-335.5"/>
+<text text-anchor="start" x="1707" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="1775" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/schedule_rule.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node159 -->
-<g id="edge110" class="edge">
+<g id="edge109" class="edge">
 <title>Node20&#45;&gt;Node159</title>
-<path fill="none" stroke="#191970" d="M2194.662,-807.4252C1946.746,-803.2953 1103.1035,-787.1482 986,-757 912.278,-738.0203 833,-756.126 833,-680 833,-680 833,-680 833,-618.5 833,-506.2698 941.1535,-403.5114 985.9358,-365.7735"/>
-<polygon fill="#191970" stroke="#191970" points="2194.9156,-810.9298 2204.9722,-807.5957 2195.0315,-803.9307 2194.9156,-810.9298"/>
+<path fill="none" stroke="#191970" d="M1974.3,-784.282C1963.1336,-758.9072 1948,-717.5842 1948,-680 1948,-680 1948,-680 1948,-551.5 1948,-504.3076 1837.0636,-403.8054 1792.8624,-365.6345"/>
+<polygon fill="#191970" stroke="#191970" points="1971.1657,-785.8448 1978.4924,-793.4979 1977.5374,-782.9462 1971.1657,-785.8448"/>
 </g>
 <!-- Node203 -->
 <g id="node45" class="node">
 <title>Node203</title>
 <g id="a_node45"><a xlink:href="structural__equal_8h.html" target="_top" xlink:title="Structural equality comparison. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="3176.5,-726.5 3176.5,-756.5 3327.5,-756.5 3327.5,-726.5 3176.5,-726.5"/>
-<text text-anchor="start" x="3184.5" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/structural</text>
-<text text-anchor="middle" x="3252" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2864.5,-726.5 2864.5,-756.5 3015.5,-756.5 3015.5,-726.5 2864.5,-726.5"/>
+<text text-anchor="start" x="2872.5" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/node/structural</text>
+<text text-anchor="middle" x="2940" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_equal.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node203 -->
-<g id="edge116" class="edge">
+<g id="edge115" class="edge">
 <title>Node20&#45;&gt;Node203</title>
-<path fill="none" stroke="#191970" d="M2331.0499,-807.2319C2481.392,-803.8548 2853.0241,-792.341 3162,-757 3166.7257,-756.4595 3171.5798,-755.8328 3176.4672,-755.1475"/>
-<polygon fill="#191970" stroke="#191970" points="2330.9612,-803.7329 2321.0405,-807.452 2331.1151,-810.7313 2330.9612,-803.7329"/>
+<path fill="none" stroke="#191970" d="M2054.2545,-807.0056C2200.5453,-803.2379 2554.9891,-791.0893 2850,-757 2854.7251,-756.454 2859.5787,-755.8231 2864.4658,-755.1347"/>
+<polygon fill="#191970" stroke="#191970" points="2053.9933,-803.5109 2044.0849,-807.2625 2054.1702,-810.5087 2053.9933,-803.5109"/>
 </g>
 <!-- Node213 -->
 <g id="node46" class="node">
 <title>Node213</title>
 <g id="a_node46"><a xlink:href="papi_8h.html" target="_top" xlink:title="include/tvm/runtime\l/contrib/papi.h">
-<polygon fill="#ffffff" stroke="#000000" points="3346,-726.5 3346,-756.5 3462,-756.5 3462,-726.5 3346,-726.5"/>
-<text text-anchor="start" x="3354" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="3404" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/contrib/papi.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="3034,-726.5 3034,-756.5 3150,-756.5 3150,-726.5 3034,-726.5"/>
+<text text-anchor="start" x="3042" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="3092" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/contrib/papi.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node213 -->
-<g id="edge117" class="edge">
+<g id="edge116" class="edge">
 <title>Node20&#45;&gt;Node213</title>
-<path fill="none" stroke="#191970" d="M2331.1936,-806.8316C2535.6432,-801.5763 3141.4109,-784.0901 3337,-757 3339.8383,-756.6069 3342.7333,-756.1512 3345.6494,-755.6483"/>
-<polygon fill="#191970" stroke="#191970" points="2331.0935,-803.333 2321.1862,-807.0873 2331.2723,-810.3307 2331.0935,-803.333"/>
+<path fill="none" stroke="#191970" d="M2054.3525,-806.7002C2254.4202,-801.1803 2836.5444,-783.2568 3025,-757 3027.838,-756.6046 3030.7327,-756.1471 3033.6486,-755.6426"/>
+<polygon fill="#191970" stroke="#191970" points="2054.1676,-803.2038 2044.2674,-806.9768 2054.3596,-810.2012 2054.1676,-803.2038"/>
 </g>
 <!-- Node214 -->
 <g id="node47" class="node">
 <title>Node214</title>
 <g id="a_node47"><a xlink:href="packed__func_8h.html" target="_top" xlink:title="Type&#45;erased function used across TVM API. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2215,-402.5 2215,-432.5 2331,-432.5 2331,-402.5 2215,-402.5"/>
-<text text-anchor="start" x="2223" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
-<text text-anchor="middle" x="2273" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/packed_func.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1892,-402.5 1892,-432.5 2008,-432.5 2008,-402.5 1892,-402.5"/>
+<text text-anchor="start" x="1900" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/runtime</text>
+<text text-anchor="middle" x="1950" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/packed_func.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node214 -->
-<g id="edge118" class="edge">
+<g id="edge117" class="edge">
 <title>Node20&#45;&gt;Node214</title>
-<path fill="none" stroke="#191970" d="M2263,-783.3849C2263,-757.4823 2263,-715.9175 2263,-680 2263,-680 2263,-680 2263,-551.5 2263,-508.3421 2268.2223,-457.5818 2271.1191,-432.7453"/>
-<polygon fill="#191970" stroke="#191970" points="2259.5001,-783.4649 2263,-793.4649 2266.5001,-783.465 2259.5001,-783.4649"/>
+<path fill="none" stroke="#191970" d="M1986,-783.3849C1986,-757.4823 1986,-715.9175 1986,-680 1986,-680 1986,-680 1986,-551.5 1986,-507.1766 1967.4023,-457.3435 1956.9412,-432.8184"/>
+<polygon fill="#191970" stroke="#191970" points="1982.5001,-783.4649 1986,-793.4649 1989.5001,-783.465 1982.5001,-783.4649"/>
 </g>
 <!-- Node193 -->
 <g id="node48" class="node">
 <title>Node193</title>
 <g id="a_node48"><a xlink:href="buffer_8h.html" target="_top" xlink:title="Symbolic n&#45;dimensional array, to represent a memory buffer. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1993,-609 1993,-628 2121,-628 2121,-609 1993,-609"/>
-<text text-anchor="middle" x="2057" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/buffer.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1648,-609 1648,-628 1776,-628 1776,-609 1648,-609"/>
+<text text-anchor="middle" x="1712" y="-616" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/buffer.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node193 -->
-<g id="edge134" class="edge">
+<g id="edge133" class="edge">
 <title>Node20&#45;&gt;Node193</title>
-<path fill="none" stroke="#191970" d="M2195.3528,-790.9178C2174.3229,-783.0599 2152.1751,-772.0701 2135,-757 2092.4766,-719.6884 2068.0395,-653.3204 2059.9906,-628.3578"/>
-<polygon fill="#191970" stroke="#191970" points="2194.3199,-794.2651 2204.9146,-794.3112 2196.6611,-787.6682 2194.3199,-794.2651"/>
+<path fill="none" stroke="#191970" d="M1927.0256,-790.0677C1904.5002,-781.6945 1879.1641,-770.5889 1858,-757 1797.7313,-718.303 1740.3641,-652.7684 1719.8988,-628.1928"/>
+<polygon fill="#191970" stroke="#191970" points="1925.8926,-793.3797 1936.4869,-793.4882 1928.2726,-786.7967 1925.8926,-793.3797"/>
 </g>
 <!-- Node194 -->
 <g id="node49" class="node">
 <title>Node194</title>
 <g id="a_node49"><a xlink:href="tir_2expr_8h.html" target="_top" xlink:title="TIR expressions. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2037.5,-542 2037.5,-561 2158.5,-561 2158.5,-542 2037.5,-542"/>
-<text text-anchor="middle" x="2098" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1559.5,-542 1559.5,-561 1680.5,-561 1680.5,-542 1559.5,-542"/>
+<text text-anchor="middle" x="1620" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node194 -->
-<g id="edge140" class="edge">
+<g id="edge139" class="edge">
 <title>Node20&#45;&gt;Node194</title>
-<path fill="none" stroke="#191970" d="M2247.6006,-784.5142C2212.5006,-729.8434 2127.7798,-597.8843 2104.1888,-561.1395"/>
-<polygon fill="#191970" stroke="#191970" points="2244.8247,-786.6691 2253.1726,-793.1931 2250.7152,-782.8872 2244.8247,-786.6691"/>
+<path fill="none" stroke="#191970" d="M1917.8082,-793.8792C1884.8807,-785.4408 1845.3733,-773.2411 1812,-757 1727.1701,-715.7176 1691.4525,-712.4161 1639,-634 1623.6371,-611.0325 1620.5916,-577.3057 1620.0577,-561.0089"/>
+<polygon fill="#191970" stroke="#191970" points="1917.2515,-797.3477 1927.8018,-796.3776 1918.9492,-790.5567 1917.2515,-797.3477"/>
 </g>
 <!-- Node199 -->
 <g id="node50" class="node">
 <title>Node199</title>
 <g id="a_node50"><a xlink:href="index__map_8h.html" target="_top" xlink:title="Defines a remapping of buffer indices. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="3480,-726.5 3480,-756.5 3598,-756.5 3598,-726.5 3480,-726.5"/>
-<text text-anchor="start" x="3488" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/index</text>
-<text text-anchor="middle" x="3539" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_map.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="3168,-726.5 3168,-756.5 3286,-756.5 3286,-726.5 3168,-726.5"/>
+<text text-anchor="start" x="3176" y="-744.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/index</text>
+<text text-anchor="middle" x="3227" y="-733.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_map.h</text>
 </a>
 </g>
 </g>
 <!-- Node20&#45;&gt;Node199 -->
-<g id="edge141" class="edge">
+<g id="edge140" class="edge">
 <title>Node20&#45;&gt;Node199</title>
-<path fill="none" stroke="#191970" d="M2331.2034,-807.2217C2552.3567,-802.8205 3248.1541,-786.896 3471,-757 3473.8802,-756.6136 3476.818,-756.1634 3479.7773,-755.6648"/>
-<polygon fill="#191970" stroke="#191970" points="2330.9637,-803.7256 2321.0348,-807.4226 2331.102,-810.7243 2330.9637,-803.7256"/>
+<path fill="none" stroke="#191970" d="M2054.4123,-807.1169C2271.5191,-802.4725 2943.3457,-786.0706 3159,-757 3161.88,-756.6118 3164.8176,-756.1601 3167.7767,-755.6603"/>
+<polygon fill="#191970" stroke="#191970" points="2054.0388,-803.624 2044.1154,-807.3357 2054.1875,-810.6224 2054.0388,-803.624"/>
 </g>
 <!-- Node21&#45;&gt;Node22 -->
 <g id="edge2" class="edge">
 <title>Node21&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M983.5622,-671.8778C1080.0316,-660.1063 1257.6928,-638.4274 1354.2612,-626.6437"/>
-<polygon fill="#191970" stroke="#191970" points="982.8506,-668.4386 973.3482,-673.1242 983.6985,-675.3871 982.8506,-668.4386"/>
+<path fill="none" stroke="#191970" d="M1329.4048,-667.2507C1293.1647,-655.5818 1240.3474,-638.5752 1207.6342,-628.0419"/>
+<polygon fill="#191970" stroke="#191970" points="1328.5648,-670.6572 1339.1563,-670.3906 1330.7103,-663.9941 1328.5648,-670.6572"/>
 </g>
 <!-- Node81 -->
 <g id="node33" class="node">
 <title>Node81</title>
 <g id="a_node33"><a xlink:href="relay_2adt_8h.html" target="_top" xlink:title="Algebraic data types for Relay. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="180,-475 180,-494 310,-494 310,-475 180,-475"/>
-<text text-anchor="middle" x="245" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/adt.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1176,-475 1176,-494 1306,-494 1306,-475 1176,-475"/>
+<text text-anchor="middle" x="1241" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/adt.h</text>
 </a>
 </g>
 </g>
 <!-- Node21&#45;&gt;Node81 -->
-<g id="edge81" class="edge">
+<g id="edge80" class="edge">
 <title>Node21&#45;&gt;Node81</title>
-<path fill="none" stroke="#191970" d="M850.9772,-671.6409C718.8685,-654.1876 426.9613,-611.7634 337,-567 300.3142,-548.7457 267.1582,-511.8061 252.6862,-494.2246"/>
-<polygon fill="#191970" stroke="#191970" points="850.5998,-675.1213 860.9706,-672.9537 851.5117,-668.1809 850.5998,-675.1213"/>
+<path fill="none" stroke="#191970" d="M1357.0175,-661.6987C1330.5028,-621.2015 1267.6146,-525.1496 1247.3859,-494.2534"/>
+<polygon fill="#191970" stroke="#191970" points="1354.2465,-663.856 1362.6525,-670.3051 1360.1029,-660.0216 1354.2465,-663.856"/>
 </g>
 <!-- Node23 -->
 <g id="node4" class="node">
 <title>Node23</title>
 <g id="a_node4"><a xlink:href="driver__api_8h.html" target="_top" xlink:title="Compiler driver APIs to drive the compilation. ">
-<polygon fill="#ffffff" stroke="#000000" points="1415,-268.5 1415,-298.5 1521,-298.5 1521,-268.5 1415,-268.5"/>
-<text text-anchor="start" x="1423" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/driver</text>
-<text text-anchor="middle" x="1468" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/driver_api.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="573,-268.5 573,-298.5 679,-298.5 679,-268.5 573,-268.5"/>
+<text text-anchor="start" x="581" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/driver</text>
+<text text-anchor="middle" x="626" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/driver_api.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node23 -->
 <g id="edge3" class="edge">
 <title>Node22&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1430.4187,-599.6573C1434.7362,-590.1136 1439.4514,-578.201 1442,-567 1464.9185,-466.2758 1441.1152,-437.2206 1456,-335 1457.7991,-322.6448 1461.0734,-308.8824 1463.7477,-298.6948"/>
-<polygon fill="#191970" stroke="#191970" points="1427.1397,-598.4032 1426.0111,-608.9378 1433.4628,-601.4063 1427.1397,-598.4032"/>
+<path fill="none" stroke="#191970" d="M1101.0488,-616.5953C900.323,-611.261 371.1925,-594.7237 297,-567 240.7491,-545.9806 190,-544.5498 190,-484.5 190,-484.5 190,-484.5 190,-417.5 190,-379.8735 180.7806,-359.8881 209,-335 268.4692,-282.5513 485.8904,-312.6511 564,-299 566.8417,-298.5034 569.7457,-297.9477 572.67,-297.3502"/>
+<polygon fill="#191970" stroke="#191970" points="1101.1281,-620.0985 1111.2169,-616.8634 1101.3126,-613.1009 1101.1281,-620.0985"/>
 </g>
 <!-- Node24 -->
 <g id="node5" class="node">
 <title>Node24</title>
 <g id="a_node5"><a xlink:href="diagnostic_8h.html" target="_top" xlink:title="A new diagnostic interface for TVM error reporting. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1109,-542 1109,-561 1255,-561 1255,-542 1109,-542"/>
-<text text-anchor="middle" x="1182" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/diagnostic.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="408,-542 408,-561 554,-561 554,-542 408,-542"/>
+<text text-anchor="middle" x="481" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/diagnostic.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node24 -->
 <g id="edge4" class="edge">
 <title>Node22&#45;&gt;Node24</title>
-<path fill="none" stroke="#191970" d="M1377.2458,-606.2342C1330.8857,-593.2378 1258.6485,-572.9872 1216.2588,-561.1039"/>
-<polygon fill="#191970" stroke="#191970" points="1376.4374,-609.6424 1387.0109,-608.9717 1378.3269,-602.9022 1376.4374,-609.6424"/>
+<path fill="none" stroke="#191970" d="M1101.256,-614.2952C985.3905,-607.4336 759.0298,-592.0269 568,-567 555.6783,-565.3857 542.4441,-563.2412 530.1084,-561.0637"/>
+<polygon fill="#191970" stroke="#191970" points="1101.2529,-617.801 1111.4409,-614.8932 1101.6633,-610.813 1101.2529,-617.801"/>
 </g>
 <!-- Node22&#45;&gt;Node26 -->
-<g id="edge37" class="edge">
+<g id="edge35" class="edge">
 <title>Node22&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1419.9351,-598.7301C1418.1603,-581.0245 1413.4527,-554.9977 1401,-536 1388.8222,-517.4217 1367.5693,-502.9337 1352.0571,-494.1211"/>
-<polygon fill="#191970" stroke="#191970" points="1416.4514,-599.0759 1420.7172,-608.774 1423.4303,-598.5325 1416.4514,-599.0759"/>
+<path fill="none" stroke="#191970" d="M1101.1476,-616.1001C909.4704,-609.7583 423.8138,-591.3558 399,-567 379.4063,-547.768 385.1535,-511.3097 389.3352,-494.0355"/>
+<polygon fill="#191970" stroke="#191970" points="1101.3147,-619.6074 1111.4243,-616.4379 1101.5447,-612.6112 1101.3147,-619.6074"/>
 </g>
 <!-- Node27 -->
 <g id="node7" class="node">
 <title>Node27</title>
 <g id="a_node7"><a xlink:href="parser_8h.html" target="_top" xlink:title="A parser for TVM IR. ">
-<polygon fill="#ffffff" stroke="#000000" points="1099,-335.5 1099,-365.5 1207,-365.5 1207,-335.5 1099,-335.5"/>
-<text text-anchor="start" x="1107" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/parser</text>
-<text text-anchor="middle" x="1153" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/parser.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="354,-335.5 354,-365.5 462,-365.5 462,-335.5 354,-335.5"/>
+<text text-anchor="start" x="362" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/parser</text>
+<text text-anchor="middle" x="408" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/parser.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node27 -->
-<g id="edge62" class="edge">
+<g id="edge60" class="edge">
 <title>Node22&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1344.2825,-614.3036C1219.9767,-606.8742 985.3172,-589.9944 955,-567 919.004,-539.6985 904.6552,-507.6795 928,-469 948.8167,-434.5092 1054.3013,-388.9715 1113.1565,-365.666"/>
-<polygon fill="#191970" stroke="#191970" points="1344.3083,-617.8112 1354.4975,-614.9077 1344.7216,-610.8234 1344.3083,-617.8112"/>
+<path fill="none" stroke="#191970" d="M1101.1581,-616.4867C914.323,-611.1721 446.7965,-595.2022 382,-567 332.6836,-545.5354 292.9124,-519.6525 311,-469 326.5571,-425.4339 365.7779,-386.3464 389.3075,-365.7759"/>
+<polygon fill="#191970" stroke="#191970" points="1101.0867,-619.986 1111.1814,-616.7693 1101.284,-612.9888 1101.0867,-619.986"/>
 </g>
 <!-- Node31 -->
 <g id="node10" class="node">
 <title>Node31</title>
 <g id="a_node10"><a xlink:href="target_8h.html" target="_top" xlink:title="Compilation target object. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1263.5,-335.5 1263.5,-365.5 1370.5,-365.5 1370.5,-335.5 1263.5,-335.5"/>
-<text text-anchor="start" x="1271.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1317" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="828.5,-335.5 828.5,-365.5 935.5,-365.5 935.5,-335.5 828.5,-335.5"/>
+<text text-anchor="start" x="836.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="882" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node31 -->
-<g id="edge78" class="edge">
+<g id="edge77" class="edge">
 <title>Node22&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1424.2937,-598.6545C1428.2922,-569.0747 1432.4158,-512.2676 1414,-469 1395.779,-426.19 1357.1414,-386.5342 1334.5152,-365.7254"/>
-<polygon fill="#191970" stroke="#191970" points="1420.7792,-598.4916 1422.761,-608.8994 1427.7021,-599.5274 1420.7792,-598.4916"/>
+<path fill="none" stroke="#191970" d="M1100.9724,-616.7561C959.4006,-612.7494 669.4575,-600.5063 639,-567 608.9373,-533.9279 716.2757,-425.9669 754,-402 778.3629,-386.5218 808.1503,-374.2045 832.8576,-365.5017"/>
+<polygon fill="#191970" stroke="#191970" points="1101.223,-620.2642 1111.3158,-617.0416 1101.4162,-613.2669 1101.223,-620.2642"/>
 </g>
 <!-- Node22&#45;&gt;Node45 -->
-<g id="edge56" class="edge">
+<g id="edge54" class="edge">
 <title>Node22&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1478.6165,-606.7204C1520.2036,-597.1243 1571.6319,-582.643 1587,-567 1640.5233,-512.5193 1604.4555,-471.9995 1635,-402 1652.1691,-362.6532 1680.0407,-320.395 1695.2539,-298.5337"/>
-<polygon fill="#191970" stroke="#191970" points="1477.8106,-603.3141 1468.8279,-608.9322 1479.3534,-610.142 1477.8106,-603.3141"/>
+<path fill="none" stroke="#191970" d="M1101.0661,-618.1721C886.0053,-616.7305 289.2279,-608.9525 212,-567 172.1606,-545.358 152,-529.8382 152,-484.5 152,-484.5 152,-484.5 152,-417.5 152,-377.8416 155.0865,-359.8424 186,-335 218.61,-308.7943 327.2803,-295.132 402.6884,-288.6319"/>
+<polygon fill="#191970" stroke="#191970" points="1101.299,-621.6736 1111.3213,-618.238 1101.344,-614.6737 1101.299,-621.6736"/>
 </g>
 <!-- Node22&#45;&gt;Node48 -->
-<g id="edge61" class="edge">
+<g id="edge59" class="edge">
 <title>Node22&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1469.7139,-605.5918C1491.1744,-597.6557 1515.0774,-585.3835 1531,-567 1592.1207,-496.4329 1565.1991,-455.4269 1592,-366 1605.04,-322.4894 1588.8701,-300.1073 1621,-268 1672.031,-217.0049 1873.3978,-178.6247 1985.5491,-160.6972"/>
-<polygon fill="#191970" stroke="#191970" points="1468.3637,-602.3555 1460.0629,-608.9394 1470.6577,-608.969 1468.3637,-602.3555"/>
+<path fill="none" stroke="#191970" d="M1226.0387,-605.6993C1248.0324,-597.7156 1272.9363,-585.3729 1290,-567 1311.6291,-543.7114 1309.3547,-531.2778 1315,-500 1317.4472,-486.4413 1318.1655,-482.4092 1315,-469 1310.8901,-451.59 1303.3528,-449.7225 1297,-433 1275.1,-375.3523 1269.0694,-360.1371 1261,-299 1259.1971,-285.3407 1252.4192,-278.7795 1261,-268 1315.0326,-200.1227 1570.3068,-167.8618 1699.9904,-155.6388"/>
+<polygon fill="#191970" stroke="#191970" points="1224.8291,-602.4139 1216.5007,-608.9628 1227.0953,-609.037 1224.8291,-602.4139"/>
 </g>
 <!-- Node22&#45;&gt;Node49 -->
-<g id="edge57" class="edge">
+<g id="edge55" class="edge">
 <title>Node22&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1497.653,-615.7553C1675.6635,-609.0185 2106.5871,-590.4912 2168,-567 2179.3137,-562.6724 2334.6824,-443.8836 2340,-433 2346.0483,-420.6208 2342.9232,-415.4641 2340,-402 2335.4431,-381.0111 2307.9049,-325.9028 2294.0084,-298.8826"/>
-<polygon fill="#191970" stroke="#191970" points="1497.4378,-612.2608 1487.5765,-616.1342 1497.701,-619.2559 1497.4378,-612.2608"/>
+<path fill="none" stroke="#191970" d="M1254.5987,-614.5139C1397.069,-606.6459 1692.1971,-588.0461 1734,-567 1779.6156,-544.0344 1813.1859,-516.3297 1794,-469 1778.4139,-430.5506 1758.6986,-430.9703 1729,-402 1712.1142,-385.5284 1699.461,-387.6087 1690,-366 1684.4741,-353.3789 1681.0835,-345.5035 1690,-335 1705.1915,-317.1046 1838.987,-300.2022 1926.6637,-290.9081"/>
+<polygon fill="#191970" stroke="#191970" points="1254.4036,-611.0192 1244.6103,-615.0617 1254.787,-618.0087 1254.4036,-611.0192"/>
 </g>
 <!-- Node22&#45;&gt;Node51 -->
-<g id="edge58" class="edge">
+<g id="edge56" class="edge">
 <title>Node22&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1344.0524,-617.2971C1074.6385,-612.842 187.2494,-596.0223 134,-567 94.6446,-545.5503 76,-529.3212 76,-484.5 76,-484.5 76,-484.5 76,-417.5 76,-374.4618 76,-323.6482 76,-298.7729"/>
-<polygon fill="#191970" stroke="#191970" points="1344.0794,-620.7979 1354.1356,-617.4628 1344.1945,-613.7988 1344.0794,-620.7979"/>
+<path fill="none" stroke="#191970" d="M1101.1829,-617.607C879.7245,-614.6126 250.3488,-603 165,-567 115.304,-546.0383 76,-538.4359 76,-484.5 76,-484.5 76,-484.5 76,-417.5 76,-373.5323 90.6225,-323.1326 98.7334,-298.5584"/>
+<polygon fill="#191970" stroke="#191970" points="1101.3707,-621.1097 1111.4164,-617.7431 1101.4639,-614.1103 1101.3707,-621.1097"/>
 </g>
 <!-- Node22&#45;&gt;Node52 -->
-<g id="edge59" class="edge">
+<g id="edge57" class="edge">
 <title>Node22&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M1488.0496,-607.0161C1535.1361,-597.7369 1592.8245,-583.5303 1611,-567 1668.9554,-514.2908 1622.5655,-461.9454 1673,-402 1716.0863,-350.7885 1787.2795,-316.5593 1832.8851,-298.571"/>
-<polygon fill="#191970" stroke="#191970" points="1487.3365,-603.589 1478.1808,-608.9204 1488.6629,-610.4622 1487.3365,-603.589"/>
+<path fill="none" stroke="#191970" d="M1205.9263,-604.135C1239.0682,-585.3194 1292.6485,-548.886 1315,-500 1320.729,-487.4698 1320.6824,-481.5514 1315,-469 1304.9987,-446.9088 1284.0013,-455.0912 1274,-433 1268.3176,-420.4486 1270.2853,-415.2676 1274,-402 1285.4879,-360.9692 1315.5093,-320.0282 1332.9441,-298.6674"/>
+<polygon fill="#191970" stroke="#191970" points="1204.2279,-601.0747 1197.1635,-608.9705 1207.61,-607.2034 1204.2279,-601.0747"/>
 </g>
 <!-- Node22&#45;&gt;Node53 -->
-<g id="edge60" class="edge">
+<g id="edge58" class="edge">
 <title>Node22&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1344.5044,-614.5968C1213.2955,-607.3173 956.0862,-590.2727 922,-567 856.3542,-522.1797 822.5815,-475.4366 853,-402 869.1156,-363.0935 883.4273,-355.8776 920,-335 957.8333,-313.4028 1004.8053,-300.7055 1043.7193,-293.325"/>
-<polygon fill="#191970" stroke="#191970" points="1344.3195,-618.0918 1354.4965,-615.1458 1344.7036,-611.1024 1344.3195,-618.0918"/>
+<path fill="none" stroke="#191970" d="M1255.0318,-615.9184C1378.0572,-610.9341 1612.8816,-597.6457 1690,-567 1735.3541,-548.977 1755.4529,-543.7898 1777,-500 1803.7822,-445.571 1737.0417,-427.4993 1682,-402 1624.0959,-375.1746 1582.0604,-416.4656 1543,-366 1516.8045,-332.1557 1567.5399,-310.2827 1616.9473,-297.6219"/>
+<polygon fill="#191970" stroke="#191970" points="1254.7806,-612.4255 1244.9273,-616.3193 1255.0582,-619.42 1254.7806,-612.4255"/>
 </g>
 <!-- Node54 -->
 <g id="node20" class="node">
 <title>Node54</title>
 <g id="a_node20"><a xlink:href="interpreter_8h.html" target="_top" xlink:title="An interpreter for Relay. ">
-<polygon fill="#ffffff" stroke="#000000" points="293.5,-274 293.5,-293 458.5,-293 458.5,-274 293.5,-274"/>
-<text text-anchor="middle" x="376" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/interpreter.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="915.5,-274 915.5,-293 1080.5,-293 1080.5,-274 915.5,-274"/>
+<text text-anchor="middle" x="998" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/interpreter.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node54 -->
-<g id="edge75" class="edge">
+<g id="edge74" class="edge">
 <title>Node22&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1344.3984,-616.7318C1085.2701,-610.5587 254.6956,-589.1358 202,-567 152.573,-546.2372 114,-538.1109 114,-484.5 114,-484.5 114,-484.5 114,-417.5 114,-335.6282 216.6638,-303.7486 293.4612,-291.3545"/>
-<polygon fill="#191970" stroke="#191970" points="1344.3945,-620.2326 1354.4748,-616.971 1344.5607,-613.2345 1344.3945,-620.2326"/>
+<path fill="none" stroke="#191970" d="M1100.9284,-610.0301C1058.0691,-602.9471 1004.8899,-590.1037 962,-567 944.2838,-557.4567 947.0277,-544.941 929,-536 842.5956,-493.1468 777.2855,-573.6386 715,-500 706.1024,-489.4805 707.518,-480.5692 715,-469 745.3018,-422.1451 901.5732,-402.2428 944,-366 968.5999,-344.9857 986.3097,-310.0329 993.8947,-293.1768"/>
+<polygon fill="#191970" stroke="#191970" points="1100.6031,-613.522 1111.0267,-611.6248 1101.6951,-606.6077 1100.6031,-613.522"/>
 </g>
 <!-- Node57 -->
 <g id="node21" class="node">
 <title>Node57</title>
 <g id="a_node21"><a xlink:href="codegen_8h.html" target="_top" xlink:title="Translates IRModule to runtime::Module. ">
-<polygon fill="#ffffff" stroke="#000000" points="2008.5,-268.5 2008.5,-298.5 2115.5,-298.5 2115.5,-268.5 2008.5,-268.5"/>
-<text text-anchor="start" x="2016.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="2062" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/codegen.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="1440.5,-268.5 1440.5,-298.5 1547.5,-298.5 1547.5,-268.5 1440.5,-268.5"/>
+<text text-anchor="start" x="1448.5" y="-286.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="1494" y="-275.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/codegen.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node57 -->
-<g id="edge77" class="edge">
+<g id="edge76" class="edge">
 <title>Node22&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1498.0962,-611.5374C1541.1105,-604.9302 1594.2649,-592.0604 1636,-567 1719.9486,-516.5919 1702.4981,-459.1646 1782,-402 1852.9278,-351.0005 1949.172,-316.4156 2008.3555,-298.3366"/>
-<polygon fill="#191970" stroke="#191970" points="1497.3512,-608.1088 1487.9576,-613.0088 1498.3566,-615.0362 1497.3512,-608.1088"/>
+<path fill="none" stroke="#191970" d="M1254.8861,-608.0431C1286.3384,-600.6922 1321.4856,-588.2016 1348,-567 1384.4852,-537.8255 1411.2723,-508.2891 1386,-469 1365.1262,-436.5489 1327.8738,-465.4511 1307,-433 1299.5464,-421.4124 1300.3585,-414.0714 1307,-402 1336.4584,-348.4574 1401.4606,-315.7125 1446.4907,-298.5851"/>
+<polygon fill="#191970" stroke="#191970" points="1253.8363,-604.6901 1244.8197,-610.2537 1255.3377,-611.5272 1253.8363,-604.6901"/>
 </g>
 <!-- Node95 -->
 <g id="node22" class="node">
 <title>Node95</title>
 <g id="a_node22"><a xlink:href="tir_2analysis_8h.html" target="_top" xlink:title="Analysis utilities and passes for TIR. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1791,-408 1791,-427 1931,-427 1931,-408 1791,-408"/>
-<text text-anchor="middle" x="1861" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/analysis.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1316,-408 1316,-427 1456,-427 1456,-408 1316,-408"/>
+<text text-anchor="middle" x="1386" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/analysis.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node95 -->
-<g id="edge79" class="edge">
+<g id="edge78" class="edge">
 <title>Node22&#45;&gt;Node95</title>
-<path fill="none" stroke="#191970" d="M1497.9104,-612.3396C1578.5656,-604.7816 1699.0511,-590.0565 1739,-567 1799.6406,-532.0012 1842.5818,-454.5043 1856.3053,-427.2281"/>
-<polygon fill="#191970" stroke="#191970" points="1497.4271,-608.8692 1487.7895,-613.2701 1498.068,-615.8398 1497.4271,-608.8692"/>
+<path fill="none" stroke="#191970" d="M1254.8272,-612.8187C1318.8672,-606.2961 1402.9123,-592.9596 1424,-567 1451.4623,-533.1931 1437.612,-510.3739 1424,-469 1418.5134,-452.3233 1405.3243,-436.6145 1396.0305,-427.0313"/>
+<polygon fill="#191970" stroke="#191970" points="1254.3446,-609.3492 1244.7337,-613.8079 1255.0274,-616.3158 1254.3446,-609.3492"/>
 </g>
 <!-- Node111 -->
 <g id="node23" class="node">
 <title>Node111</title>
 <g id="a_node23"><a xlink:href="type__relation_8h.html" target="_top" xlink:title="Type relation and function for type inference(checking). ">
-<polygon fill="#ffffff" stroke="#ff0000" points="937.5,-469.5 937.5,-499.5 1046.5,-499.5 1046.5,-469.5 937.5,-469.5"/>
-<text text-anchor="start" x="945.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type</text>
-<text text-anchor="middle" x="992" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_relation.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="482.5,-469.5 482.5,-499.5 591.5,-499.5 591.5,-469.5 482.5,-469.5"/>
+<text text-anchor="start" x="490.5" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/type</text>
+<text text-anchor="middle" x="537" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_relation.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node111 -->
-<g id="edge38" class="edge">
+<g id="edge36" class="edge">
 <title>Node22&#45;&gt;Node111</title>
-<path fill="none" stroke="#191970" d="M1344.0761,-615.2868C1220.9081,-609.3438 990.6925,-594.645 966,-567 948.9933,-547.9598 966.0391,-517.8266 979.411,-499.7296"/>
-<polygon fill="#191970" stroke="#191970" points="1344.0483,-618.7893 1354.203,-615.7673 1344.3802,-611.7972 1344.0483,-618.7893"/>
+<path fill="none" stroke="#191970" d="M1101.0708,-614.8423C955.1468,-607.4284 648.8411,-589.4144 606,-567 577.0472,-551.852 555.4346,-518.8618 544.6564,-499.5018"/>
+<polygon fill="#191970" stroke="#191970" points="1101.1308,-618.3496 1111.2943,-615.3577 1101.4833,-611.3585 1101.1308,-618.3496"/>
 </g>
 <!-- Node144 -->
 <g id="node24" class="node">
 <title>Node144</title>
-<g id="a_node24"><a xlink:href="error_8h.html" target="_top" xlink:title="Utilities for error tracking and reporting. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1273.5,-542 1273.5,-561 1392.5,-561 1392.5,-542 1273.5,-542"/>
-<text text-anchor="middle" x="1333" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/error.h</text>
+<g id="a_node24"><a xlink:href="global__var__supply_8h.html" target="_top" xlink:title="GlobalVarSupply that can be used to generate unique. ">
+<polygon fill="#ffffff" stroke="#000000" points="218.5,-335.5 218.5,-365.5 335.5,-365.5 335.5,-335.5 218.5,-335.5"/>
+<text text-anchor="start" x="226.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/global</text>
+<text text-anchor="middle" x="277" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_var_supply.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node144 -->
 <g id="edge33" class="edge">
 <title>Node22&#45;&gt;Node144</title>
-<path fill="none" stroke="#191970" d="M1400.1899,-602.6559C1383.225,-589.7395 1359.8204,-571.9201 1345.5849,-561.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1398.3151,-605.6275 1408.3917,-608.9005 1402.5555,-600.058 1398.3151,-605.6275"/>
+<path fill="none" stroke="#191970" d="M1101.1314,-616.1602C897.3287,-609.6508 356.5921,-590.2681 326,-567 262.3528,-518.5905 269.6914,-406.4198 274.7564,-365.6552"/>
+<polygon fill="#191970" stroke="#191970" points="1101.0548,-619.6594 1111.1609,-616.4788 1101.2771,-612.663 1101.0548,-619.6594"/>
 </g>
 <!-- Node145 -->
 <g id="node25" class="node">
 <title>Node145</title>
-<g id="a_node25"><a xlink:href="global__var__supply_8h.html" target="_top" xlink:title="GlobalVarSupply that can be used to generate unique. ">
-<polygon fill="#ffffff" stroke="#000000" points="1465.5,-335.5 1465.5,-365.5 1582.5,-365.5 1582.5,-335.5 1465.5,-335.5"/>
-<text text-anchor="start" x="1473.5" y="-353.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/ir/global</text>
-<text text-anchor="middle" x="1524" y="-342.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">_var_supply.h</text>
+<g id="a_node25"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/arg_info.h">
+<polygon fill="#ffffff" stroke="#000000" points="2069,-402.5 2069,-432.5 2221,-432.5 2221,-402.5 2069,-402.5"/>
+<text text-anchor="start" x="2077" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
+<text text-anchor="middle" x="2145" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node145 -->
-<g id="edge35" class="edge">
+<g id="edge37" class="edge">
 <title>Node22&#45;&gt;Node145</title>
-<path fill="none" stroke="#191970" d="M1462.516,-605.0192C1480.5385,-596.9404 1500.0047,-584.7138 1511,-567 1551.5107,-501.7355 1535.7126,-403.0067 1527.6232,-365.5909"/>
-<polygon fill="#191970" stroke="#191970" points="1461.1578,-601.7934 1453.2755,-608.8729 1463.8523,-608.254 1461.1578,-601.7934"/>
+<path fill="none" stroke="#191970" d="M1254.7551,-615.4862C1399.3779,-609.273 1706.957,-593.3691 1810,-567 1931.5493,-535.8951 2064.4064,-464.1319 2119.189,-432.7057"/>
+<polygon fill="#191970" stroke="#191970" points="1254.4712,-611.995 1244.6289,-615.9168 1254.7687,-618.9887 1254.4712,-611.995"/>
 </g>
-<!-- Node146 -->
-<g id="node26" class="node">
-<title>Node146</title>
-<g id="a_node26"><a xlink:href="arg__info_8h.html" target="_top" xlink:title="include/tvm/meta_schedule\l/arg_info.h">
-<polygon fill="#ffffff" stroke="#000000" points="2425,-402.5 2425,-432.5 2577,-432.5 2577,-402.5 2425,-402.5"/>
-<text text-anchor="start" x="2433" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/meta_schedule</text>
-<text text-anchor="middle" x="2501" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/arg_info.h</text>
+<!-- Node82 -->
+<g id="node30" class="node">
+<title>Node82</title>
+<g id="a_node30"><a xlink:href="relay_2analysis_8h.html" target="_top" xlink:title="The set of Relay analysis passes written in C++. ">
+<polygon fill="#ffffff" stroke="#000000" points="1000.5,-408 1000.5,-427 1155.5,-427 1155.5,-408 1000.5,-408"/>
+<text text-anchor="middle" x="1078" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/analysis.h</text>
 </a>
 </g>
 </g>
-<!-- Node22&#45;&gt;Node146 -->
-<g id="edge39" class="edge">
-<title>Node22&#45;&gt;Node146</title>
-<path fill="none" stroke="#191970" d="M1497.9479,-616.7458C1688.6561,-611.9611 2172.5822,-596.9905 2239,-567 2295.139,-541.6508 2284.3216,-500.9243 2337,-469 2364.4363,-452.373 2397.7493,-440.6288 2427.1654,-432.5986"/>
-<polygon fill="#191970" stroke="#191970" points="1497.6337,-613.2524 1487.7237,-616.9996 1497.8075,-620.2503 1497.6337,-613.2524"/>
+<!-- Node22&#45;&gt;Node82 -->
+<g id="edge61" class="edge">
+<title>Node22&#45;&gt;Node82</title>
+<path fill="none" stroke="#191970" d="M1182.2263,-598.573C1184.9777,-581.4981 1186.9115,-556.4908 1180,-536 1163.4297,-486.8731 1114.8065,-445.089 1091.1833,-427.0377"/>
+<polygon fill="#191970" stroke="#191970" points="1178.7636,-598.0508 1180.385,-608.5209 1185.6467,-599.3249 1178.7636,-598.0508"/>
 </g>
-<!-- Node82 -->
+<!-- Node150 -->
 <g id="node31" class="node">
-<title>Node82</title>
-<g id="a_node31"><a xlink:href="relay_2analysis_8h.html" target="_top" xlink:title="The set of Relay analysis passes written in C++. ">
-<polygon fill="#ffffff" stroke="#000000" points="179.5,-408 179.5,-427 334.5,-427 334.5,-408 179.5,-408"/>
-<text text-anchor="middle" x="257" y="-415" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/analysis.h</text>
+<title>Node150</title>
+<g id="a_node31"><a xlink:href="error_8h.html" target="_top" xlink:title="include/tvm/relay/error.h">
+<polygon fill="#ffffff" stroke="#ff0000" points="648.5,-542 648.5,-561 785.5,-561 785.5,-542 648.5,-542"/>
+<text text-anchor="middle" x="717" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/error.h</text>
 </a>
 </g>
 </g>
-<!-- Node22&#45;&gt;Node82 -->
-<g id="edge63" class="edge">
-<title>Node22&#45;&gt;Node82</title>
-<path fill="none" stroke="#191970" d="M1344.156,-616.3392C1113.0873,-609.6334 434.4845,-588.344 337,-567 259.2805,-549.9835 215.5014,-565.9508 171,-500 163.2935,-488.5791 164.2066,-480.9865 171,-469 182.7558,-448.2577 206.559,-434.9669 226.1483,-427.0927"/>
-<polygon fill="#191970" stroke="#191970" points="1344.3923,-619.8474 1354.4893,-616.638 1344.5947,-612.8503 1344.3923,-619.8474"/>
+<!-- Node22&#45;&gt;Node150 -->
+<g id="edge62" class="edge">
+<title>Node22&#45;&gt;Node150</title>
+<path fill="none" stroke="#191970" d="M1101.2333,-609.6089C1023.7563,-600.2997 900.7736,-584.6033 795,-567 784.5133,-565.2548 773.2783,-563.1571 762.6941,-561.0769"/>
+<polygon fill="#191970" stroke="#191970" points="1101.0431,-613.1111 1111.3881,-610.8238 1101.8747,-606.1606 1101.0431,-613.1111"/>
 </g>
 <!-- Node80 -->
 <g id="node32" class="node">
 <title>Node80</title>
 <g id="a_node32"><a xlink:href="relay_2expr_8h.html" target="_top" xlink:title="Relay expression language. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="346.5,-542 346.5,-561 481.5,-561 481.5,-542 346.5,-542"/>
-<text text-anchor="middle" x="414" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1014.5,-542 1014.5,-561 1149.5,-561 1149.5,-542 1014.5,-542"/>
+<text text-anchor="middle" x="1082" y="-549" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/expr.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node80 -->
-<g id="edge64" class="edge">
+<g id="edge63" class="edge">
 <title>Node22&#45;&gt;Node80</title>
-<path fill="none" stroke="#191970" d="M1343.9369,-613.3727C1153.3435,-600.6917 662.265,-568.0181 481.8196,-556.0123"/>
-<polygon fill="#191970" stroke="#191970" points="1343.9377,-616.8804 1354.1481,-614.0521 1344.4025,-609.8958 1343.9377,-616.8804"/>
+<path fill="none" stroke="#191970" d="M1155.8056,-603.0102C1137.2602,-590.067 1111.4011,-572.0195 1095.7289,-561.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1154.0421,-606.0475 1164.2455,-608.9005 1158.0483,-600.3072 1154.0421,-606.0475"/>
 </g>
 <!-- Node90 -->
 <g id="node34" class="node">
 <title>Node90</title>
 <g id="a_node34"><a xlink:href="relay_2feature_8h.html" target="_top" xlink:title="Detect features used in Expr/Module. ">
-<polygon fill="#ffffff" stroke="#000000" points="614.5,-475 614.5,-494 763.5,-494 763.5,-475 614.5,-475"/>
-<text text-anchor="middle" x="689" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/feature.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="971.5,-475 971.5,-494 1120.5,-494 1120.5,-475 971.5,-475"/>
+<text text-anchor="middle" x="1046" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/feature.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node90 -->
-<g id="edge74" class="edge">
+<g id="edge73" class="edge">
 <title>Node22&#45;&gt;Node90</title>
-<path fill="none" stroke="#191970" d="M1344.3216,-615.0819C1185.6068,-607.6091 831.6128,-588.7368 781,-567 742.9548,-550.6606 709.9842,-512.067 696.0605,-494.0613"/>
-<polygon fill="#191970" stroke="#191970" points="1344.3355,-618.5863 1354.4882,-615.5575 1344.6627,-611.5939 1344.3355,-618.5863"/>
+<path fill="none" stroke="#191970" d="M1177.9291,-598.8742C1176.8715,-580.6787 1172.662,-553.8076 1158,-536 1140.3156,-514.5215 1111.71,-501.5766 1087.8355,-494.0371"/>
+<polygon fill="#191970" stroke="#191970" points="1174.4324,-599.0389 1178.2455,-608.9238 1181.429,-598.8185 1174.4324,-599.0389"/>
 </g>
 <!-- Node151 -->
 <g id="node36" class="node">
 <title>Node151</title>
 <g id="a_node36"><a xlink:href="ir__docsifier_8h.html" target="_top" xlink:title="include/tvm/script\l/printer/ir_docsifier.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="975,-536.5 975,-566.5 1091,-566.5 1091,-536.5 975,-536.5"/>
-<text text-anchor="start" x="983" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="1033" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/printer/ir_docsifier.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="804,-536.5 804,-566.5 920,-566.5 920,-536.5 804,-536.5"/>
+<text text-anchor="start" x="812" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="862" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/printer/ir_docsifier.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node151 -->
-<g id="edge76" class="edge">
+<g id="edge75" class="edge">
 <title>Node22&#45;&gt;Node151</title>
-<path fill="none" stroke="#191970" d="M1344.1763,-607.6801C1278.9834,-598.1387 1183.0744,-583.2537 1100,-567 1097.0708,-566.4269 1094.0747,-565.8179 1091.0527,-565.1855"/>
-<polygon fill="#191970" stroke="#191970" points="1343.9689,-611.1868 1354.369,-609.165 1344.978,-604.2599 1343.9689,-611.1868"/>
+<path fill="none" stroke="#191970" d="M1122.713,-606.8971C1073.2495,-596.5005 998.7687,-580.8068 934,-567 929.4981,-566.0403 924.8385,-565.044 920.1546,-564.0404"/>
+<polygon fill="#191970" stroke="#191970" points="1122.0625,-610.3367 1132.5685,-608.9681 1123.502,-603.4864 1122.0625,-610.3367"/>
 </g>
 <!-- Node153 -->
 <g id="node37" class="node">
 <title>Node153</title>
 <g id="a_node37"><a xlink:href="state_8h.html" target="_top" xlink:title="This file defines ScheduleState, the core data structure of TensorIR scheduling. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="1987,-402.5 1987,-432.5 2121,-432.5 2121,-402.5 1987,-402.5"/>
-<text text-anchor="start" x="1995" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/schedule</text>
-<text text-anchor="middle" x="2054" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/state.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1512,-402.5 1512,-432.5 1646,-432.5 1646,-402.5 1512,-402.5"/>
+<text text-anchor="start" x="1520" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/schedule</text>
+<text text-anchor="middle" x="1579" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/state.h</text>
 </a>
 </g>
 </g>
 <!-- Node22&#45;&gt;Node153 -->
-<g id="edge80" class="edge">
+<g id="edge79" class="edge">
 <title>Node22&#45;&gt;Node153</title>
-<path fill="none" stroke="#191970" d="M1497.933,-614.7642C1570.2228,-609.504 1680.6508,-596.9255 1772,-567 1875.2119,-533.1884 1985.6838,-463.5166 2031.7979,-432.7153"/>
-<polygon fill="#191970" stroke="#191970" points="1497.283,-611.301 1487.5508,-615.4884 1497.7702,-618.284 1497.283,-611.301"/>
+<path fill="none" stroke="#191970" d="M1254.6587,-609.2446C1321.671,-600.1599 1413.1067,-585.0616 1445,-567 1504.4451,-533.3355 1551.4132,-463.5856 1570.1641,-432.7396"/>
+<polygon fill="#191970" stroke="#191970" points="1253.9916,-605.8025 1244.543,-610.5957 1254.9184,-612.7409 1253.9916,-605.8025"/>
 </g>
 <!-- Node24&#45;&gt;Node26 -->
 <g id="edge5" class="edge">
 <title>Node24&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1213.006,-537.7424C1242.3495,-524.7224 1285.634,-505.5167 1311.4055,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1211.3557,-534.6455 1203.6346,-541.9005 1214.1948,-541.0439 1211.3557,-534.6455"/>
+<path fill="none" stroke="#191970" d="M459.9534,-535.6559C442.7957,-522.7395 419.1252,-504.9201 404.7279,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="458.1542,-538.6824 468.2485,-541.9005 462.3643,-533.0899 458.1542,-538.6824"/>
 </g>
 <!-- Node24&#45;&gt;Node111 -->
 <g id="edge32" class="edge">
 <title>Node24&#45;&gt;Node111</title>
-<path fill="none" stroke="#191970" d="M1145.0224,-538.4605C1113.3658,-527.2974 1067.7776,-511.2216 1034.599,-499.5218"/>
-<polygon fill="#191970" stroke="#191970" points="1144.1828,-541.8757 1154.7776,-541.9005 1146.5108,-535.2741 1144.1828,-541.8757"/>
+<path fill="none" stroke="#191970" d="M495.5915,-534.0423C504.5451,-523.33 515.8932,-509.7528 524.4445,-499.5218"/>
+<polygon fill="#191970" stroke="#191970" points="492.7511,-531.9831 489.0234,-541.9005 498.1221,-536.4723 492.7511,-531.9831"/>
 </g>
 <!-- Node26&#45;&gt;Node23 -->
 <g id="edge6" class="edge">
 <title>Node26&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1342.9452,-465.646C1358.5618,-436.6208 1390.429,-379.6703 1423,-335 1432.3737,-322.1442 1444.2637,-308.6365 1453.4675,-298.6817"/>
-<polygon fill="#191970" stroke="#191970" points="1339.7435,-464.2116 1338.1246,-474.6821 1345.9196,-467.5065 1339.7435,-464.2116"/>
+<path fill="none" stroke="#191970" d="M403.0804,-466.023C422.2943,-435.2976 464.5162,-373.299 514,-335 534.1402,-319.412 559.7573,-307.1879 581.4075,-298.5685"/>
+<polygon fill="#191970" stroke="#191970" points="399.9222,-464.4775 397.6706,-474.8303 405.8869,-468.1413 399.9222,-464.4775"/>
 </g>
 <!-- Node26&#45;&gt;Node27 -->
 <g id="edge7" class="edge">
 <title>Node26&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M1297.9088,-470.9646C1276.8373,-461.9314 1250.1129,-448.8211 1229,-433 1202.6629,-413.2641 1177.9777,-383.6318 1164.2413,-365.7704"/>
-<polygon fill="#191970" stroke="#191970" points="1296.5755,-474.2007 1307.152,-474.8233 1299.2722,-467.741 1296.5755,-474.2007"/>
+<path fill="none" stroke="#191970" d="M394.3652,-464.6918C397.5892,-437.6902 403.2937,-389.915 406.1957,-365.611"/>
+<polygon fill="#191970" stroke="#191970" points="390.8611,-464.5186 393.1507,-474.8631 397.8117,-465.3486 390.8611,-464.5186"/>
 </g>
 <!-- Node28 -->
 <g id="node8" class="node">
 <title>Node28</title>
 <g id="a_node8"><a xlink:href="relay_2transform_8h.html" target="_top" xlink:title="Relay specific transformation passes. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="648,-274 648,-293 810,-293 810,-274 648,-274"/>
-<text text-anchor="middle" x="729" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/transform.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="697,-274 697,-293 859,-293 859,-274 697,-274"/>
+<text text-anchor="middle" x="778" y="-281" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/transform.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node28 -->
 <g id="edge8" class="edge">
 <title>Node26&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1283.4131,-472.4685C1205.2158,-453.0063 1048.8886,-412.1118 920,-366 856.0616,-343.1251 783.0252,-309.3549 748.7358,-293.0285"/>
-<polygon fill="#191970" stroke="#191970" points="1282.9154,-475.951 1293.4637,-474.9589 1284.5991,-469.1565 1282.9154,-475.951"/>
+<path fill="none" stroke="#191970" d="M410.8803,-468.0689C422.3033,-458.0455 437.105,-444.911 450,-433 473.0901,-411.6717 524.3356,-349.9242 552,-335 567.5613,-326.6051 668.2733,-305.4641 729.8858,-293.0346"/>
+<polygon fill="#191970" stroke="#191970" points="408.4581,-465.5375 403.2354,-474.7556 413.0667,-470.8064 408.4581,-465.5375"/>
 </g>
 <!-- Node30 -->
 <g id="node9" class="node">
 <title>Node30</title>
 <g id="a_node9"><a xlink:href="target__kind_8h.html" target="_top" xlink:title="Target kind registry. ">
-<polygon fill="#ffffff" stroke="#000000" points="1238.5,-402.5 1238.5,-432.5 1345.5,-432.5 1345.5,-402.5 1238.5,-402.5"/>
-<text text-anchor="start" x="1246.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
-<text text-anchor="middle" x="1292" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target_kind.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="594.5,-402.5 594.5,-432.5 701.5,-432.5 701.5,-402.5 594.5,-402.5"/>
+<text text-anchor="start" x="602.5" y="-420.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/target</text>
+<text text-anchor="middle" x="648" y="-409.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/target_kind.h</text>
 </a>
 </g>
 </g>
 <!-- Node26&#45;&gt;Node30 -->
 <g id="edge9" class="edge">
 <title>Node26&#45;&gt;Node30</title>
-<path fill="none" stroke="#191970" d="M1321.7283,-466.0804C1315.2699,-455.5264 1307.2672,-442.4489 1301.1924,-432.5218"/>
-<polygon fill="#191970" stroke="#191970" points="1318.9206,-468.1977 1327.1257,-474.9005 1324.8914,-464.544 1318.9206,-468.1977"/>
+<path fill="none" stroke="#191970" d="M438.245,-472.3968C482.1494,-460.9062 548.1223,-443.6399 594.3255,-431.5476"/>
+<polygon fill="#191970" stroke="#191970" points="437.1947,-469.0537 428.4067,-474.9717 438.967,-475.8257 437.1947,-469.0537"/>
 </g>
 <!-- Node26&#45;&gt;Node95 -->
 <g id="edge31" class="edge">
 <title>Node26&#45;&gt;Node95</title>
-<path fill="none" stroke="#191970" d="M1415.0925,-474.083C1518.2425,-460.9938 1693.503,-438.7544 1790.7437,-426.4151"/>
-<polygon fill="#191970" stroke="#191970" points="1414.5131,-470.6283 1405.0333,-475.3594 1415.3944,-477.5726 1414.5131,-470.6283"/>
+<path fill="none" stroke="#191970" d="M445.9714,-473.1077C454.9651,-471.5159 464.2219,-470.0635 473,-469 778.2953,-432.0137 857.0421,-451.7104 1164,-433 1214.7116,-429.9089 1271.9016,-425.9036 1315.5337,-422.7389"/>
+<polygon fill="#191970" stroke="#191970" points="445.1688,-469.6966 435.9686,-474.9508 446.4373,-476.5807 445.1688,-469.6966"/>
 </g>
 <!-- Node30&#45;&gt;Node31 -->
 <g id="edge10" class="edge">
 <title>Node30&#45;&gt;Node31</title>
-<path fill="none" stroke="#191970" d="M1301.1783,-392.9021C1304.586,-383.7696 1308.3487,-373.6854 1311.3496,-365.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1297.8897,-391.704 1297.6729,-402.2967 1304.448,-394.1512 1297.8897,-391.704"/>
+<path fill="none" stroke="#191970" d="M710.2222,-399.6842C747.339,-389.0568 793.9548,-375.7095 829.2568,-365.6017"/>
+<polygon fill="#191970" stroke="#191970" points="709.1644,-396.3464 700.5142,-402.4639 711.0913,-403.076 709.1644,-396.3464"/>
 </g>
 <!-- Node31&#45;&gt;Node23 -->
 <g id="edge11" class="edge">
 <title>Node31&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1360.2418,-331.3132C1383.6503,-320.9267 1412.1539,-308.2794 1433.9649,-298.6017"/>
-<polygon fill="#191970" stroke="#191970" points="1358.6084,-328.2089 1350.8873,-335.4639 1361.4475,-334.6073 1358.6084,-328.2089"/>
+<path fill="none" stroke="#191970" d="M818.5646,-333.8978C775.6806,-322.6742 719.6978,-308.0225 679.1563,-297.412"/>
+<polygon fill="#191970" stroke="#191970" points="817.7737,-337.3086 828.334,-336.4546 819.546,-330.5367 817.7737,-337.3086"/>
 </g>
 <!-- Node31&#45;&gt;Node28 -->
 <g id="edge29" class="edge">
 <title>Node31&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M1253.1426,-340.2363C1240.8319,-338.3889 1228.0265,-336.5578 1216,-335 1073.5066,-316.5421 906.9114,-300.0104 810.1358,-290.9057"/>
-<polygon fill="#191970" stroke="#191970" points="1252.927,-343.7436 1263.3394,-341.7862 1253.979,-336.8231 1252.927,-343.7436"/>
+<path fill="none" stroke="#191970" d="M850.2481,-330.0445C831.1799,-317.7601 807.8068,-302.7025 792.9076,-293.1039"/>
+<polygon fill="#191970" stroke="#191970" points="848.3583,-332.9904 858.6604,-335.4639 852.1494,-327.1058 848.3583,-332.9904"/>
 </g>
 <!-- Node31&#45;&gt;Node45 -->
 <g id="edge12" class="edge">
 <title>Node31&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M1380.8623,-339.5006C1449.0904,-327.7492 1557.2032,-309.1282 1629.8225,-296.6205"/>
-<polygon fill="#191970" stroke="#191970" points="1379.9412,-336.1076 1370.6804,-341.2543 1381.1294,-343.006 1379.9412,-336.1076"/>
+<path fill="none" stroke="#191970" d="M818.5186,-340.5984C754.2869,-330.4906 652.156,-314.1746 564,-299 561.1913,-298.5165 558.3312,-298.0194 555.443,-297.5134"/>
+<polygon fill="#191970" stroke="#191970" points="818.0719,-344.0711 828.494,-342.1662 819.1587,-337.156 818.0719,-344.0711"/>
 </g>
 <!-- Node31&#45;&gt;Node48 -->
 <g id="edge27" class="edge">
 <title>Node31&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1336.8639,-327.65C1353.7259,-309.4261 1379.367,-284.3335 1406,-268 1479.4784,-222.9373 1504.0834,-220.691 1588,-201 1726.0054,-168.617 1891.4839,-156.5133 1985.9238,-152.0484"/>
-<polygon fill="#191970" stroke="#191970" points="1333.8856,-325.7241 1329.7622,-335.4836 1339.0718,-330.4256 1333.8856,-325.7241"/>
+<path fill="none" stroke="#191970" d="M882.7156,-325.2351C884.6255,-306.7919 890.255,-282.5632 906,-268 963.9817,-214.3704 1498.2844,-169.9093 1699.676,-154.9206"/>
+<polygon fill="#191970" stroke="#191970" points="879.2177,-325.0784 881.9725,-335.3088 886.1988,-325.5934 879.2177,-325.0784"/>
 </g>
 <!-- Node31&#45;&gt;Node49 -->
 <g id="edge19" class="edge">
 <title>Node31&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M1380.948,-342.5929C1404.5113,-339.8843 1431.4421,-337.0283 1456,-335 1752.7531,-310.4903 1828.0706,-321.2723 2125,-299 2152.7219,-296.9206 2183.0805,-294.1315 2209.8905,-291.4925"/>
-<polygon fill="#191970" stroke="#191970" points="1380.2553,-339.1499 1370.727,-343.7826 1381.0647,-346.1029 1380.2553,-339.1499"/>
+<path fill="none" stroke="#191970" d="M945.713,-347.0872C1092.5173,-339.1692 1465.9325,-318.7168 1778,-299 1827.628,-295.8644 1883.3902,-292.0253 1926.9865,-288.9523"/>
+<polygon fill="#191970" stroke="#191970" points="945.3358,-343.6024 935.5386,-347.6355 945.7125,-350.5923 945.3358,-343.6024"/>
 </g>
 <!-- Node31&#45;&gt;Node51 -->
 <g id="edge23" class="edge">
 <title>Node31&#45;&gt;Node51</title>
-<path fill="none" stroke="#191970" d="M1253.2156,-339.588C1240.8983,-337.7994 1228.0718,-336.1557 1216,-335 803.3555,-295.4962 697.8698,-322.4063 284,-299 240.2543,-296.526 191.3939,-292.9279 152.0268,-289.8211"/>
-<polygon fill="#191970" stroke="#191970" points="1253.0063,-343.0957 1263.4147,-341.1171 1254.0442,-336.1731 1253.0063,-343.0957"/>
+<path fill="none" stroke="#191970" d="M817.9046,-344.9802C675.4459,-332.7119 333.0585,-303.2261 180.077,-290.0516"/>
+<polygon fill="#191970" stroke="#191970" points="817.9545,-348.4974 828.218,-345.8684 818.5552,-341.5232 817.9545,-348.4974"/>
 </g>
 <!-- Node31&#45;&gt;Node52 -->
 <g id="edge24" class="edge">
 <title>Node31&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M1381.0166,-343.3073C1404.5871,-340.6732 1431.5049,-337.6819 1456,-335 1604.8566,-318.7022 1642.7976,-320.4462 1791,-299 1793.8207,-298.5918 1796.6909,-298.1589 1799.5877,-297.707"/>
-<polygon fill="#191970" stroke="#191970" points="1380.3402,-339.861 1370.7912,-344.451 1381.1183,-346.8176 1380.3402,-339.861"/>
+<path fill="none" stroke="#191970" d="M945.6614,-341.3075C1030.2034,-329.0999 1179.3482,-307.564 1269.7849,-294.5052"/>
+<polygon fill="#191970" stroke="#191970" points="945.0771,-337.8555 935.68,-342.7488 946.0776,-344.7836 945.0771,-337.8555"/>
 </g>
 <!-- Node31&#45;&gt;Node53 -->
 <g id="edge25" class="edge">
 <title>Node31&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1263.0138,-332.1392C1232.0352,-321.6033 1193.6176,-308.5375 1164.4035,-298.6017"/>
-<polygon fill="#191970" stroke="#191970" points="1262.1949,-335.5575 1272.7894,-335.4639 1264.4489,-328.9303 1262.1949,-335.5575"/>
+<path fill="none" stroke="#191970" d="M945.9785,-346.4176C1065.9285,-338.5864 1332.7529,-320.3259 1557,-299 1576.3989,-297.1552 1597.324,-294.9019 1616.8073,-292.6911"/>
+<polygon fill="#191970" stroke="#191970" points="945.6209,-342.9334 935.8694,-347.0756 946.0757,-349.9186 945.6209,-342.9334"/>
 </g>
 <!-- Node31&#45;&gt;Node54 -->
 <g id="edge28" class="edge">
 <title>Node31&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M1253.1933,-339.808C1240.878,-337.9994 1228.0579,-336.2922 1216,-335 1178.7959,-331.0131 663.1463,-300.4234 458.6725,-288.3645"/>
-<polygon fill="#191970" stroke="#191970" points="1252.9819,-343.3156 1263.3917,-341.3442 1254.0246,-336.3937 1252.9819,-343.3156"/>
+<path fill="none" stroke="#191970" d="M916.8419,-330.3758C938.2134,-318.0319 964.6042,-302.789 981.3723,-293.1039"/>
+<polygon fill="#191970" stroke="#191970" points="914.9415,-327.4315 908.0327,-335.4639 918.4426,-333.4931 914.9415,-327.4315"/>
 </g>
 <!-- Node31&#45;&gt;Node57 -->
 <g id="edge30" class="edge">
 <title>Node31&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M1380.9683,-342.8265C1404.5338,-340.1422 1431.4607,-337.242 1456,-335 1680.0808,-314.5275 1737.3888,-324.0868 1961,-299 1976.4257,-297.2694 1993.1059,-294.911 2008.3808,-292.5562"/>
-<polygon fill="#191970" stroke="#191970" points="1380.2811,-339.3823 1370.7461,-344.0011 1381.0803,-346.3365 1380.2811,-339.3823"/>
+<path fill="none" stroke="#191970" d="M945.9387,-347.5711C1048.7489,-342.1748 1256.9238,-328.4133 1431,-299 1434.1097,-298.4746 1437.2929,-297.8786 1440.4962,-297.2339"/>
+<polygon fill="#191970" stroke="#191970" points="945.5311,-344.0874 935.7246,-348.0978 945.8916,-351.0781 945.5311,-344.0874"/>
 </g>
 <!-- Node45&#45;&gt;Node46 -->
 <g id="edge13" class="edge">
 <title>Node45&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M1790.8876,-266.4546C1983.2542,-227.8276 2449.797,-134.1462 2632.1725,-97.5253"/>
-<polygon fill="#191970" stroke="#191970" points="1789.9435,-263.0743 1780.8283,-268.4745 1791.3217,-269.9373 1789.9435,-263.0743"/>
+<path fill="none" stroke="#191970" d="M524.1301,-264.4718C572.4121,-245.0153 651.6935,-215.634 723,-201 1000.1896,-144.1134 1882.0749,-98.6923 2147.6199,-86.03"/>
+<polygon fill="#191970" stroke="#191970" points="522.5511,-261.3355 514.6054,-268.3437 525.1873,-267.8202 522.5511,-261.3355"/>
 </g>
 <!-- Node45&#45;&gt;Node47 -->
 <g id="edge15" class="edge">
 <title>Node45&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M1735.2034,-262.5705C1783.1354,-229.3657 1882.6215,-165.1892 1977,-134 2130.9357,-83.129 2616.9953,-37.8716 2805.8841,-21.7555"/>
-<polygon fill="#191970" stroke="#191970" points="1733.0593,-259.7989 1726.8662,-268.3952 1737.0683,-265.5372 1733.0593,-259.7989"/>
+<path fill="none" stroke="#191970" d="M506.4031,-262.2586C532.3963,-243.2093 573.2808,-216.0152 613,-201 776.6905,-139.1193 827.3797,-155.9044 1001,-134 1535.7356,-66.5364 2184.4352,-30.1597 2402.9914,-19.147"/>
+<polygon fill="#191970" stroke="#191970" points="504.1145,-259.5989 498.1821,-268.3771 508.2938,-265.2143 504.1145,-259.5989"/>
 </g>
 <!-- Node45&#45;&gt;Node48 -->
 <g id="edge16" class="edge">
 <title>Node45&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1755.7973,-264.7561C1827.0326,-237.9428 1956.6221,-189.1647 2022.001,-164.5558"/>
-<polygon fill="#191970" stroke="#191970" points="1754.2717,-261.5905 1746.1457,-268.389 1756.7377,-268.1418 1754.2717,-261.5905"/>
+<path fill="none" stroke="#191970" d="M558.2737,-266.2371C664.1995,-243.2731 842.2105,-205.0783 873,-201 1031.9739,-179.9429 1512.0927,-159.7035 1699.707,-152.3812"/>
+<polygon fill="#191970" stroke="#191970" points="557.286,-262.8699 548.2552,-268.4103 558.7699,-269.7108 557.286,-262.8699"/>
 </g>
 <!-- Node46&#45;&gt;Node47 -->
 <g id="edge14" class="edge">
 <title>Node46&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M2755.6735,-63.865C2783.0667,-53.3773 2816.8283,-40.4515 2842.5553,-30.6017"/>
-<polygon fill="#191970" stroke="#191970" points="2754.3609,-60.6197 2746.2734,-67.4639 2756.8638,-67.157 2754.3609,-60.6197"/>
+<path fill="none" stroke="#191970" d="M2291.033,-64.8874C2331.6051,-54.2273 2382.8078,-40.774 2421.5234,-30.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2290.0093,-61.5375 2281.227,-67.4639 2291.7882,-68.3077 2290.0093,-61.5375"/>
 </g>
 <!-- Node48&#45;&gt;Node46 -->
 <g id="edge17" class="edge">
 <title>Node48&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2148.2734,-140.5383C2275.192,-127.3545 2510.2116,-102.9416 2630.9898,-90.3956"/>
-<polygon fill="#191970" stroke="#191970" points="2147.7892,-137.0696 2138.2043,-141.5842 2148.5125,-144.0322 2147.7892,-137.0696"/>
+<path fill="none" stroke="#191970" d="M1862.3772,-136.582C1945.0585,-124.2167 2068.5837,-105.7431 2147.7204,-93.9079"/>
+<polygon fill="#191970" stroke="#191970" points="1861.6232,-133.1557 1852.2508,-138.0964 1862.6586,-140.0787 1861.6232,-133.1557"/>
 </g>
 <!-- Node48&#45;&gt;Node47 -->
 <g id="edge18" class="edge">
 <title>Node48&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M2148.3819,-135.3839C2309.4581,-109.0617 2653.1545,-52.8967 2805.9989,-27.9197"/>
-<polygon fill="#191970" stroke="#191970" points="2147.3448,-132.0069 2138.0402,-137.0739 2148.4738,-138.9153 2147.3448,-132.0069"/>
+<path fill="none" stroke="#191970" d="M1846.0737,-131.972C1918.9502,-114.1206 2036.5019,-86.3834 2139,-67 2229.1918,-49.9439 2333.687,-34.7989 2402.7722,-25.4216"/>
+<polygon fill="#191970" stroke="#191970" points="1844.9698,-128.639 1836.0933,-134.4234 1846.6395,-135.437 1844.9698,-128.639"/>
 </g>
 <!-- Node49&#45;&gt;Node50 -->
 <g id="edge20" class="edge">
 <title>Node49&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2350.9155,-265.8198C2390.016,-255.1704 2439.2817,-241.7525 2476.552,-231.6017"/>
-<polygon fill="#191970" stroke="#191970" points="2349.936,-262.459 2341.2072,-268.4639 2351.7755,-269.213 2349.936,-262.459"/>
+<path fill="none" stroke="#191970" d="M2046.5282,-264.3132C2070.0917,-253.9267 2098.784,-241.2794 2120.7395,-231.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2044.8505,-261.2277 2037.1118,-268.4639 2047.674,-267.6331 2044.8505,-261.2277"/>
 </g>
 <!-- Node50&#45;&gt;Node46 -->
 <g id="edge21" class="edge">
 <title>Node50&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2554.2838,-193.7535C2571.8652,-176.3961 2597.4397,-152.4141 2622,-134 2640.0846,-120.4411 2661.9185,-107.2733 2678.929,-97.6468"/>
-<polygon fill="#191970" stroke="#191970" points="2551.4084,-191.6781 2546.8007,-201.2186 2556.3522,-196.6339 2551.4084,-191.6781"/>
+<path fill="none" stroke="#191970" d="M2167.3946,-192.4294C2181.5368,-164.9647 2204.2402,-120.8742 2216.1415,-97.7614"/>
+<polygon fill="#191970" stroke="#191970" points="2164.2474,-190.8961 2162.7811,-201.389 2170.4708,-194.1007 2164.2474,-190.8961"/>
 </g>
 <!-- Node50&#45;&gt;Node48 -->
 <g id="edge22" class="edge">
 <title>Node50&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M2446.0086,-204.2416C2358.026,-191.6994 2222.6104,-172.3955 2138.3166,-160.3792"/>
-<polygon fill="#191970" stroke="#191970" points="2445.5701,-207.7144 2455.964,-205.6608 2446.5581,-200.7845 2445.5701,-207.7144"/>
+<path fill="none" stroke="#191970" d="M2068.9021,-201.2795C2003.64,-189.7424 1914.7945,-174.0362 1852.1186,-162.9563"/>
+<polygon fill="#191970" stroke="#191970" points="2068.303,-204.7278 2078.7596,-203.0221 2069.5216,-197.8347 2068.303,-204.7278"/>
 </g>
 <!-- Node53&#45;&gt;Node48 -->
 <g id="edge26" class="edge">
 <title>Node53&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M1164.8917,-264.5719C1213.3259,-245.0604 1293.1797,-215.505 1365,-201 1481.9284,-177.3849 1830.4177,-159.6645 1985.7805,-152.715"/>
-<polygon fill="#191970" stroke="#191970" points="1163.2879,-261.4457 1155.3416,-268.4534 1165.9235,-267.9306 1163.2879,-261.4457"/>
+<path fill="none" stroke="#191970" d="M1707.6871,-259.7882C1724.6823,-232.3502 1752.1684,-187.9751 1766.547,-164.7614"/>
+<polygon fill="#191970" stroke="#191970" points="1704.6501,-258.0446 1702.3598,-268.389 1710.6011,-261.7307 1704.6501,-258.0446"/>
 </g>
-<!-- Node144&#45;&gt;Node26 -->
+<!-- Node144&#45;&gt;Node23 -->
 <g id="edge34" class="edge">
-<title>Node144&#45;&gt;Node26</title>
-<path fill="none" stroke="#191970" d="M1333,-531.6079C1333,-519.214 1333,-503.8263 1333,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="1329.5001,-531.9005 1333,-541.9005 1336.5001,-531.9006 1329.5001,-531.9005"/>
-</g>
-<!-- Node145&#45;&gt;Node23 -->
-<g id="edge36" class="edge">
-<title>Node145&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M1504.6515,-327.3509C1496.7089,-317.8482 1487.7364,-307.1132 1480.657,-298.6432"/>
-<polygon fill="#191970" stroke="#191970" points="1502.1941,-329.8684 1511.2927,-335.2967 1507.5651,-325.3792 1502.1941,-329.8684"/>
+<title>Node144&#45;&gt;Node23</title>
+<path fill="none" stroke="#191970" d="M345.5822,-334.967C443.5225,-316.1965 470.5177,-318.3348 564,-299 566.9336,-298.3932 569.9381,-297.7421 572.9658,-297.0626"/>
+<polygon fill="#191970" stroke="#191970" points="344.7067,-331.5715 335.557,-336.9133 346.0408,-338.4432 344.7067,-331.5715"/>
 </g>
-<!-- Node146&#45;&gt;Node49 -->
-<g id="edge43" class="edge">
-<title>Node146&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M2468.2429,-397.0839C2424.7197,-369.9579 2348.8337,-322.6615 2310.1567,-298.5558"/>
-<polygon fill="#191970" stroke="#191970" points="2466.4168,-400.0699 2476.7547,-402.389 2470.1193,-394.1293 2466.4168,-400.0699"/>
-</g>
-<!-- Node146&#45;&gt;Node50 -->
-<g id="edge55" class="edge">
-<title>Node146&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2471.3607,-396.2277C2434.312,-366.9638 2379.1779,-312.8171 2409,-268 2420.4783,-250.7503 2438.9646,-239.2672 2458.1559,-231.627"/>
-<polygon fill="#191970" stroke="#191970" points="2469.3527,-399.0989 2479.4118,-402.4256 2473.6228,-393.5521 2469.3527,-399.0989"/>
-</g>
-<!-- Node146&#45;&gt;Node147 -->
-<g id="edge40" class="edge">
-<title>Node146&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M2587.3888,-405.7952C2638.6858,-397.1728 2697.2785,-383.8439 2716,-366 2734.5721,-348.2985 2739.0219,-317.1416 2739.9425,-298.6007"/>
-<polygon fill="#191970" stroke="#191970" points="2586.5437,-402.3869 2577.2407,-407.4569 2587.6749,-409.2949 2586.5437,-402.3869"/>
+<!-- Node145&#45;&gt;Node49 -->
+<g id="edge41" class="edge">
+<title>Node145&#45;&gt;Node49</title>
+<path fill="none" stroke="#191970" d="M2121.3731,-395.2042C2092.4389,-367.9001 2044.1999,-322.3788 2019.1725,-298.7614"/>
+<polygon fill="#191970" stroke="#191970" points="2119.3117,-398.0713 2128.9868,-402.389 2124.116,-392.9802 2119.3117,-398.0713"/>
 </g>
-<!-- Node146&#45;&gt;Node148 -->
-<g id="edge44" class="edge">
-<title>Node146&#45;&gt;Node148</title>
-<path fill="none" stroke="#191970" d="M2531.6246,-396.3469C2546.0578,-386.3776 2562.9467,-374.7121 2576.0763,-365.6432"/>
-<polygon fill="#191970" stroke="#191970" points="2529.2497,-393.7335 2523.0108,-402.2967 2533.228,-399.4932 2529.2497,-393.7335"/>
+<!-- Node145&#45;&gt;Node50 -->
+<g id="edge53" class="edge">
+<title>Node145&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M2146.2513,-392.348C2148.3579,-350.0061 2152.5491,-265.7637 2154.2533,-231.5088"/>
+<polygon fill="#191970" stroke="#191970" points="2142.7529,-392.2307 2145.7516,-402.3923 2149.7443,-392.5786 2142.7529,-392.2307"/>
 </g>
-<!-- Node146&#45;&gt;Node150 -->
-<g id="edge49" class="edge">
-<title>Node146&#45;&gt;Node150</title>
-<path fill="none" stroke="#191970" d="M2587.1231,-402.355C2652.9309,-390.7825 2742.7778,-374.9826 2805.9467,-363.8742"/>
-<polygon fill="#191970" stroke="#191970" points="2586.428,-398.9234 2577.1853,-404.1026 2587.6404,-405.8177 2586.428,-398.9234"/>
+<!-- Node145&#45;&gt;Node146 -->
+<g id="edge38" class="edge">
+<title>Node145&#45;&gt;Node146</title>
+<path fill="none" stroke="#191970" d="M2231.2318,-403.1329C2283.3657,-393.3712 2344.0542,-379.7732 2367,-366 2395.1981,-349.0741 2418.1657,-317.3485 2430.0946,-298.5556"/>
+<polygon fill="#191970" stroke="#191970" points="2230.4694,-399.7144 2221.2708,-404.9715 2231.74,-406.5981 2230.4694,-399.7144"/>
 </g>
-<!-- Node147&#45;&gt;Node47 -->
+<!-- Node145&#45;&gt;Node147 -->
 <g id="edge42" class="edge">
-<title>Node147&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M2752.762,-259.414C2781.4956,-205.1844 2850.5828,-74.7944 2874.0468,-30.5103"/>
-<polygon fill="#191970" stroke="#191970" points="2749.6307,-257.8483 2748.0414,-268.3233 2755.8161,-261.1257 2749.6307,-257.8483"/>
+<title>Node145&#45;&gt;Node147</title>
+<path fill="none" stroke="#191970" d="M2231.3492,-403.9193C2308.6107,-391.7678 2420.7829,-374.1257 2494.7821,-362.4873"/>
+<polygon fill="#191970" stroke="#191970" points="2230.495,-400.5105 2221.1603,-405.5217 2231.5826,-407.4255 2230.495,-400.5105"/>
 </g>
-<!-- Node147&#45;&gt;Node50 -->
-<g id="edge41" class="edge">
-<title>Node147&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2683.4237,-265.2759C2650.6423,-254.7165 2609.8618,-241.5805 2578.8829,-231.6017"/>
-<polygon fill="#191970" stroke="#191970" points="2682.7292,-268.7292 2693.3207,-268.4639 2684.8755,-262.0664 2682.7292,-268.7292"/>
-</g>
-<!-- Node148&#45;&gt;Node46 -->
+<!-- Node145&#45;&gt;Node149 -->
 <g id="edge47" class="edge">
-<title>Node148&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2608.0717,-325.7364C2630.2908,-271.106 2682.9586,-141.6111 2700.8951,-97.5103"/>
-<polygon fill="#191970" stroke="#191970" points="2604.6981,-324.7415 2604.1726,-335.3233 2611.1823,-327.3788 2604.6981,-324.7415"/>
+<title>Node145&#45;&gt;Node149</title>
+<path fill="none" stroke="#191970" d="M2177.139,-396.7951C2192.7223,-386.7558 2211.1061,-374.9124 2225.3784,-365.7177"/>
+<polygon fill="#191970" stroke="#191970" points="2174.8506,-394.1058 2168.3396,-402.4639 2178.6417,-399.9904 2174.8506,-394.1058"/>
 </g>
-<!-- Node148&#45;&gt;Node50 -->
-<g id="edge48" class="edge">
-<title>Node148&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2595.5628,-325.15C2593.1361,-308.2031 2588.3523,-285.8311 2579,-268 2571.6909,-254.0645 2559.685,-241.0017 2549.6374,-231.5189"/>
-<polygon fill="#191970" stroke="#191970" points="2592.1305,-325.911 2596.8414,-335.4009 2599.0767,-325.0445 2592.1305,-325.911"/>
+<!-- Node146&#45;&gt;Node47 -->
+<g id="edge40" class="edge">
+<title>Node146&#45;&gt;Node47</title>
+<path fill="none" stroke="#191970" d="M2453.7433,-259.3572C2466.9965,-236.1383 2485.4958,-199.5644 2493,-165 2502.241,-122.436 2498.7635,-110.1725 2493,-67 2491.3326,-54.5098 2487.4856,-40.7558 2484.2455,-30.6058"/>
+<polygon fill="#191970" stroke="#191970" points="2450.6269,-257.7532 2448.6065,-268.1536 2456.6717,-261.2832 2450.6269,-257.7532"/>
 </g>
-<!-- Node148&#45;&gt;Node147 -->
+<!-- Node146&#45;&gt;Node50 -->
+<g id="edge39" class="edge">
+<title>Node146&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M2365.4875,-266.1573C2320.123,-255.4551 2262.4964,-241.8601 2219.0131,-231.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2364.7284,-269.5742 2375.2649,-268.4639 2366.3357,-262.7612 2364.7284,-269.5742"/>
+</g>
+<!-- Node147&#45;&gt;Node46 -->
 <g id="edge45" class="edge">
-<title>Node148&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M2638.9253,-331.1902C2660.8011,-320.8685 2687.3496,-308.3421 2707.7475,-298.7177"/>
-<polygon fill="#191970" stroke="#191970" points="2637.4179,-328.0313 2629.8676,-335.4639 2640.405,-334.362 2637.4179,-328.0313"/>
+<title>Node147&#45;&gt;Node46</title>
+<path fill="none" stroke="#191970" d="M2570.0543,-325.162C2568.2393,-307.5853 2563.4219,-284.466 2551,-268 2511.5936,-215.7645 2333.3295,-131.564 2257.8874,-97.5103"/>
+<polygon fill="#191970" stroke="#191970" points="2566.5762,-325.5807 2570.8376,-335.2807 2573.5553,-325.0404 2566.5762,-325.5807"/>
 </g>
-<!-- Node148&#45;&gt;Node149 -->
+<!-- Node147&#45;&gt;Node50 -->
 <g id="edge46" class="edge">
-<title>Node148&#45;&gt;Node149</title>
-<path fill="none" stroke="#191970" d="M2565.861,-329.7951C2550.2777,-319.7558 2531.8939,-307.9124 2517.6216,-298.7177"/>
-<polygon fill="#191970" stroke="#191970" points="2564.3583,-332.9904 2574.6604,-335.4639 2568.1494,-327.1058 2564.3583,-332.9904"/>
+<title>Node147&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M2563.6724,-325.9533C2556.6602,-306.9511 2544.1235,-281.6439 2524,-268 2477.6227,-236.5558 2325.0154,-224.0848 2231.2029,-219.3079"/>
+<polygon fill="#191970" stroke="#191970" points="2560.3836,-327.1529 2566.9299,-335.4834 2567.0073,-324.8889 2560.3836,-327.1529"/>
 </g>
-<!-- Node150&#45;&gt;Node46 -->
-<g id="edge51" class="edge">
-<title>Node150&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2884.7544,-325.0184C2885.5578,-308.1823 2884.6303,-285.9882 2877,-268 2844.5036,-191.3908 2766.2343,-126.0779 2728.3098,-97.6712"/>
-<polygon fill="#191970" stroke="#191970" points="2881.2479,-324.9872 2884.0295,-335.2103 2888.2302,-325.4838 2881.2479,-324.9872"/>
+<!-- Node147&#45;&gt;Node146 -->
+<g id="edge43" class="edge">
+<title>Node147&#45;&gt;Node146</title>
+<path fill="none" stroke="#191970" d="M2532.411,-330.9132C2512.1813,-320.645 2487.7733,-308.2561 2468.9812,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="2530.8754,-334.0587 2541.3766,-335.4639 2534.0437,-327.8168 2530.8754,-334.0587"/>
 </g>
-<!-- Node150&#45;&gt;Node47 -->
-<g id="edge53" class="edge">
-<title>Node150&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M2889.0473,-325.4573C2891.0694,-317.113 2892.9973,-307.7214 2894,-299 2895.5736,-285.3124 2894.3271,-281.7739 2894,-268 2891.8588,-177.8391 2885.4226,-69.7221 2882.9667,-30.5751"/>
-<polygon fill="#191970" stroke="#191970" points="2885.6075,-324.7799 2886.4982,-335.3372 2892.3856,-326.5287 2885.6075,-324.7799"/>
+<!-- Node147&#45;&gt;Node148 -->
+<g id="edge44" class="edge">
+<title>Node147&#45;&gt;Node148</title>
+<path fill="none" stroke="#191970" d="M2605.2747,-330.3561C2622.5561,-320.1995 2643.17,-308.0843 2659.1071,-298.7177"/>
+<polygon fill="#191970" stroke="#191970" points="2603.4317,-327.3795 2596.5838,-335.4639 2606.9786,-333.4144 2603.4317,-327.3795"/>
 </g>
-<!-- Node150&#45;&gt;Node48 -->
-<g id="edge54" class="edge">
-<title>Node150&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M2879.6668,-325.4727C2876.6581,-307.1532 2869.77,-282.9753 2854,-268 2774.6249,-192.6248 2724.6134,-221.0296 2617,-201 2447.661,-169.4818 2245.489,-156.8954 2138.011,-152.1516"/>
-<polygon fill="#191970" stroke="#191970" points="2876.2105,-326.0317 2881.0278,-335.4681 2883.1465,-325.0873 2876.2105,-326.0317"/>
+<!-- Node149&#45;&gt;Node46 -->
+<g id="edge49" class="edge">
+<title>Node149&#45;&gt;Node46</title>
+<path fill="none" stroke="#191970" d="M2248.0338,-324.8422C2246.7771,-295.0268 2244.2276,-244.3709 2240,-201 2236.3748,-163.809 2229.9374,-120.3332 2226.4347,-97.7973"/>
+<polygon fill="#191970" stroke="#191970" points="2244.551,-325.3401 2248.453,-335.1902 2251.5453,-325.0567 2244.551,-325.3401"/>
+</g>
+<!-- Node149&#45;&gt;Node47 -->
+<g id="edge51" class="edge">
+<title>Node149&#45;&gt;Node47</title>
+<path fill="none" stroke="#191970" d="M2265.1956,-326.9108C2309.4882,-262.3976 2431.7677,-84.2949 2468.4582,-30.8544"/>
+<polygon fill="#191970" stroke="#191970" points="2262.255,-325.0102 2259.4803,-335.2352 2268.0258,-328.9723 2262.255,-325.0102"/>
 </g>
-<!-- Node150&#45;&gt;Node50 -->
+<!-- Node149&#45;&gt;Node48 -->
 <g id="edge52" class="edge">
-<title>Node150&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2871.5774,-326.0025C2862.2084,-307.0738 2846.5909,-281.842 2825,-268 2789.9445,-245.5258 2682.8022,-230.9311 2608.294,-223.2062"/>
-<polygon fill="#191970" stroke="#191970" points="2868.4911,-327.6648 2875.9057,-335.2327 2874.8289,-324.6928 2868.4911,-327.6648"/>
+<title>Node149&#45;&gt;Node48</title>
+<path fill="none" stroke="#191970" d="M2212.7613,-330.4714C2180.4918,-312.9901 2131.7535,-287.4478 2088,-268 1994.4157,-226.4033 1881.3299,-185.7576 1820.2824,-164.5786"/>
+<polygon fill="#191970" stroke="#191970" points="2211.3588,-333.6929 2221.8154,-335.3991 2214.7051,-327.5445 2211.3588,-333.6929"/>
 </g>
-<!-- Node150&#45;&gt;Node147 -->
+<!-- Node149&#45;&gt;Node50 -->
 <g id="edge50" class="edge">
-<title>Node150&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M2841.0747,-331.1902C2819.1989,-320.8685 2792.6504,-308.3421 2772.2525,-298.7177"/>
-<polygon fill="#191970" stroke="#191970" points="2839.595,-334.362 2850.1324,-335.4639 2842.5821,-328.0313 2839.595,-334.362"/>
+<title>Node149&#45;&gt;Node50</title>
+<path fill="none" stroke="#191970" d="M2234.9985,-326.6074C2224.9309,-309.8601 2210.7581,-287.1387 2197,-268 2187.9761,-255.4469 2176.8916,-241.925 2168.3762,-231.8879"/>
+<polygon fill="#191970" stroke="#191970" points="2232.0147,-328.4375 2240.1406,-335.2361 2238.028,-324.854 2232.0147,-328.4375"/>
+</g>
+<!-- Node149&#45;&gt;Node146 -->
+<g id="edge48" class="edge">
+<title>Node149&#45;&gt;Node146</title>
+<path fill="none" stroke="#191970" d="M2301.4563,-332.0023C2331.2664,-321.4903 2368.1202,-308.4945 2396.1743,-298.6017"/>
+<polygon fill="#191970" stroke="#191970" points="2299.9065,-328.8374 2291.6397,-335.4639 2302.2345,-335.439 2299.9065,-328.8374"/>
 </g>
 <!-- Node80&#45;&gt;Node28 -->
-<g id="edge73" class="edge">
+<g id="edge72" class="edge">
 <title>Node80&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M414.4056,-531.7835C415.6717,-513.9146 419.8674,-487.6071 433,-469 501.7168,-371.6374 639.3387,-314.4494 699.5993,-293.1255"/>
-<polygon fill="#191970" stroke="#191970" points="410.9031,-531.757 413.9369,-541.9081 417.8956,-532.0807 410.9031,-531.757"/>
+<path fill="none" stroke="#191970" d="M1004.1938,-543.6794C912.8885,-533.96 767.3062,-516.6196 715,-500 653.0519,-480.3169 618.8337,-488.5002 585,-433 542.0905,-362.6119 675.7914,-312.8828 742.5069,-293.0518"/>
+<polygon fill="#191970" stroke="#191970" points="1003.913,-547.1691 1014.2256,-544.7399 1004.649,-540.2079 1003.913,-547.1691"/>
 </g>
 <!-- Node80&#45;&gt;Node54 -->
-<g id="edge72" class="edge">
+<g id="edge71" class="edge">
 <title>Node80&#45;&gt;Node54</title>
-<path fill="none" stroke="#191970" d="M407.9409,-532.0296C405.1608,-522.4625 402.0373,-510.7137 400,-500 384.9522,-420.8677 378.3302,-323.5388 376.5302,-293.0749"/>
-<polygon fill="#191970" stroke="#191970" points="404.627,-533.1637 410.8577,-541.7328 411.3307,-531.1486 404.627,-533.1637"/>
+<path fill="none" stroke="#191970" d="M1009.5012,-538.917C987.6803,-531.3719 966.0785,-519.3035 953,-500 937.198,-476.6767 960.5553,-437.9159 978,-366 984.3926,-339.6467 991.9278,-308.5567 995.6608,-293.1531"/>
+<polygon fill="#191970" stroke="#191970" points="1008.586,-542.2984 1019.176,-541.9782 1010.6978,-535.6245 1008.586,-542.2984"/>
 </g>
 <!-- Node80&#45;&gt;Node82 -->
-<g id="edge67" class="edge">
+<g id="edge66" class="edge">
 <title>Node80&#45;&gt;Node82</title>
-<path fill="none" stroke="#191970" d="M395.2802,-535.03C376.1955,-518.2893 345.6855,-491.6656 319,-469 301.7225,-454.3252 281.4838,-437.5999 268.9505,-427.2959"/>
-<polygon fill="#191970" stroke="#191970" points="393.1293,-537.7991 402.9528,-541.7678 397.7483,-532.5393 393.1293,-537.7991"/>
+<path fill="none" stroke="#191970" d="M1017.1717,-538.7562C995.9481,-531.1747 974.6035,-519.1242 962,-500 954.4183,-488.4958 954.49,-480.5511 962,-469 976.2926,-447.0168 1002.3627,-434.343 1026.1727,-427.0739"/>
+<polygon fill="#191970" stroke="#191970" points="1016.2902,-542.1503 1026.8828,-541.9345 1018.4676,-535.4975 1016.2902,-542.1503"/>
 </g>
 <!-- Node80&#45;&gt;Node81 -->
-<g id="edge65" class="edge">
+<g id="edge64" class="edge">
 <title>Node80&#45;&gt;Node81</title>
-<path fill="none" stroke="#191970" d="M380.1511,-538.0806C347.3005,-525.057 298.2631,-505.6162 269.1687,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="379.2004,-541.4687 389.7864,-541.9005 381.7802,-534.9614 379.2004,-541.4687"/>
+<path fill="none" stroke="#191970" d="M1114.2465,-537.9118C1145.1502,-524.8895 1191.0065,-505.5664 1218.2614,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1112.637,-534.792 1104.7808,-541.9005 1115.3552,-541.2427 1112.637,-534.792"/>
 </g>
 <!-- Node80&#45;&gt;Node90 -->
-<g id="edge68" class="edge">
+<g id="edge67" class="edge">
 <title>Node80&#45;&gt;Node90</title>
-<path fill="none" stroke="#191970" d="M463.0091,-539.5596C516.2916,-526.578 600.3971,-506.0869 649.5809,-494.1039"/>
-<polygon fill="#191970" stroke="#191970" points="461.996,-536.204 453.1087,-541.9717 463.6531,-543.005 461.996,-536.204"/>
+<path fill="none" stroke="#191970" d="M1071.9186,-532.7374C1065.1613,-520.1614 1056.5446,-504.1246 1051.1484,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1069.0257,-534.7482 1076.8421,-541.9005 1075.192,-531.435 1069.0257,-534.7482"/>
 </g>
 <!-- Node91 -->
 <g id="node35" class="node">
 <title>Node91</title>
 <g id="a_node35"><a xlink:href="relay_2function_8h.html" target="_top" xlink:title="Relay Function. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="442,-475 442,-494 596,-494 596,-475 442,-475"/>
-<text text-anchor="middle" x="519" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/function.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="724,-475 724,-494 878,-494 878,-475 724,-475"/>
+<text text-anchor="middle" x="801" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/relay/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node80&#45;&gt;Node91 -->
-<g id="edge69" class="edge">
+<g id="edge68" class="edge">
 <title>Node80&#45;&gt;Node91</title>
-<path fill="none" stroke="#191970" d="M437.7239,-536.3619C458.044,-523.3957 486.6867,-505.119 503.984,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="435.5912,-533.5708 429.0439,-541.9005 439.3567,-539.4719 435.5912,-533.5708"/>
+<path fill="none" stroke="#191970" d="M1032.2608,-539.6405C977.834,-526.6633 891.6406,-506.1118 841.2791,-494.1039"/>
+<polygon fill="#191970" stroke="#191970" points="1031.4989,-543.0568 1042.038,-541.9717 1033.1224,-536.2477 1031.4989,-543.0568"/>
 </g>
 <!-- Node81&#45;&gt;Node82 -->
-<g id="edge66" class="edge">
+<g id="edge65" class="edge">
 <title>Node81&#45;&gt;Node82</title>
-<path fill="none" stroke="#191970" d="M248.495,-464.9863C250.7262,-452.5286 253.5208,-436.9258 255.2839,-427.0817"/>
-<polygon fill="#191970" stroke="#191970" points="245.0372,-464.4401 246.7193,-474.9005 251.9276,-465.6742 245.0372,-464.4401"/>
+<path fill="none" stroke="#191970" d="M1208.3528,-471.0806C1176.6685,-458.057 1129.3721,-438.6162 1101.3106,-427.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1207.0663,-474.3359 1217.6461,-474.9005 1209.7276,-467.8615 1207.0663,-474.3359"/>
 </g>
 <!-- Node91&#45;&gt;Node28 -->
-<g id="edge71" class="edge">
+<g id="edge70" class="edge">
 <title>Node91&#45;&gt;Node28</title>
-<path fill="none" stroke="#191970" d="M536.4669,-467.7817C578.777,-427.2849 685.7177,-324.9273 718.9729,-293.0974"/>
-<polygon fill="#191970" stroke="#191970" points="533.9328,-465.3623 529.1287,-474.8054 538.7731,-470.4192 533.9328,-465.3623"/>
+<path fill="none" stroke="#191970" d="M798.7272,-464.6373C793.8736,-422.2215 782.6496,-324.1332 779.0982,-293.0974"/>
+<polygon fill="#191970" stroke="#191970" points="795.2764,-465.2681 799.8907,-474.8054 802.231,-464.4723 795.2764,-465.2681"/>
 </g>
 <!-- Node91&#45;&gt;Node82 -->
-<g id="edge70" class="edge">
+<g id="edge69" class="edge">
 <title>Node91&#45;&gt;Node82</title>
-<path fill="none" stroke="#191970" d="M471.9907,-472.4785C421.2109,-459.4929 341.317,-439.062 294.5556,-427.1039"/>
-<polygon fill="#191970" stroke="#191970" points="471.1846,-475.885 481.74,-474.9717 472.919,-469.1032 471.1846,-475.885"/>
+<path fill="none" stroke="#191970" d="M850.3655,-472.5596C904.0356,-459.578 988.7527,-439.0869 1038.2942,-427.1039"/>
+<polygon fill="#191970" stroke="#191970" points="849.29,-469.2187 840.3932,-474.9717 850.9358,-476.0225 849.29,-469.2187"/>
 </g>
 <!-- Node160&#45;&gt;Node22 -->
-<g id="edge83" class="edge">
+<g id="edge82" class="edge">
 <title>Node160&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M2280.5583,-674.857C2099.7785,-663.0041 1656.4561,-633.9377 1487.7008,-622.8732"/>
-<polygon fill="#191970" stroke="#191970" points="2280.5782,-678.3657 2290.7858,-675.5275 2281.0362,-671.3807 2280.5782,-678.3657"/>
+<path fill="none" stroke="#191970" d="M2041.8849,-674.9001C1860.6705,-663.0692 1414.2436,-633.9235 1244.7443,-622.8575"/>
+<polygon fill="#191970" stroke="#191970" points="2041.6679,-678.3933 2051.8747,-675.5523 2042.124,-671.4082 2041.6679,-678.3933"/>
 </g>
 <!-- Node160&#45;&gt;Node91 -->
-<g id="edge84" class="edge">
+<g id="edge83" class="edge">
 <title>Node160&#45;&gt;Node91</title>
-<path fill="none" stroke="#191970" d="M2280.7445,-678.0536C1998.286,-670.4866 1032.276,-639.7621 733,-567 658.7134,-548.9389 575.4817,-511.5632 538.5088,-494.0042"/>
-<polygon fill="#191970" stroke="#191970" points="2280.811,-681.5565 2290.9005,-678.3236 2280.9971,-674.559 2280.811,-681.5565"/>
+<path fill="none" stroke="#191970" d="M2041.3694,-673.1661C1795.3067,-651.6312 1053.4625,-585.5164 1005,-567 982.992,-558.5912 983.0358,-546.6092 962,-536 923.613,-516.6399 876.4356,-502.5724 843.0999,-494.0802"/>
+<polygon fill="#191970" stroke="#191970" points="2041.3951,-676.6816 2051.662,-674.0662 2042.005,-669.7082 2041.3951,-676.6816"/>
 </g>
 <!-- Node161 -->
 <g id="node39" class="node">
 <title>Node161</title>
 <g id="a_node39"><a xlink:href="script_2ir__builder_2base_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/base.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="2338,-603.5 2338,-633.5 2442,-633.5 2442,-603.5 2338,-603.5"/>
-<text text-anchor="start" x="2346" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="2390" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/base.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2166,-603.5 2166,-633.5 2270,-633.5 2270,-603.5 2166,-603.5"/>
+<text text-anchor="start" x="2174" y="-621.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="2218" y="-610.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/base.h</text>
 </a>
 </g>
 </g>
 <!-- Node160&#45;&gt;Node161 -->
-<g id="edge85" class="edge">
+<g id="edge84" class="edge">
 <title>Node160&#45;&gt;Node161</title>
-<path fill="none" stroke="#191970" d="M2368.4596,-661.2334C2372.9356,-652.3537 2378.2082,-641.8934 2382.4165,-633.5446"/>
-<polygon fill="#191970" stroke="#191970" points="2365.2196,-659.8855 2363.8438,-670.3906 2371.4704,-663.0363 2365.2196,-659.8855"/>
+<path fill="none" stroke="#191970" d="M2144.1343,-664.8545C2159.3286,-655.3193 2178.863,-643.0605 2194.0265,-633.5446"/>
+<polygon fill="#191970" stroke="#191970" points="2141.9223,-662.1105 2135.3125,-670.3906 2145.6432,-668.0397 2141.9223,-662.1105"/>
 </g>
 <!-- Node162 -->
 <g id="node40" class="node">
 <title>Node162</title>
 <g id="a_node40"><a xlink:href="ir_2frame_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/frame.h">
-<polygon fill="#ffffff" stroke="#ff0000" points="2329,-536.5 2329,-566.5 2445,-566.5 2445,-536.5 2329,-536.5"/>
-<text text-anchor="start" x="2337" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="2387" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/frame.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="2128,-536.5 2128,-566.5 2244,-566.5 2244,-536.5 2128,-536.5"/>
+<text text-anchor="start" x="2136" y="-554.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="2186" y="-543.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/frame.h</text>
 </a>
 </g>
 </g>
 <!-- Node160&#45;&gt;Node162 -->
-<g id="edge88" class="edge">
+<g id="edge87" class="edge">
 <title>Node160&#45;&gt;Node162</title>
-<path fill="none" stroke="#191970" d="M2343.7753,-662.0933C2332.3361,-646.3503 2320.053,-622.9094 2329,-603 2335.7893,-587.8922 2349.4438,-575.4132 2361.7992,-566.5246"/>
-<polygon fill="#191970" stroke="#191970" points="2341.3356,-664.6563 2350.2461,-670.3881 2346.8549,-660.3507 2341.3356,-664.6563"/>
+<path fill="none" stroke="#191970" d="M2128.5923,-661.2542C2135.8856,-645.5578 2146.7672,-622.6364 2157,-603 2163.4491,-590.6245 2171.2306,-576.8633 2177.1087,-566.6814"/>
+<polygon fill="#191970" stroke="#191970" points="2125.3753,-659.8723 2124.3606,-670.4184 2131.7305,-662.8069 2125.3753,-659.8723"/>
 </g>
 <!-- Node163 -->
 <g id="node41" class="node">
 <title>Node163</title>
 <g id="a_node41"><a xlink:href="ir_2ir_8h.html" target="_top" xlink:title="include/tvm/script\l/ir_builder/ir/ir.h">
-<polygon fill="#ffffff" stroke="#000000" points="2346,-469.5 2346,-499.5 2450,-499.5 2450,-469.5 2346,-469.5"/>
-<text text-anchor="start" x="2354" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
-<text text-anchor="middle" x="2398" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/ir.h</text>
+<polygon fill="#ffffff" stroke="#000000" points="2103,-469.5 2103,-499.5 2207,-499.5 2207,-469.5 2103,-469.5"/>
+<text text-anchor="start" x="2111" y="-487.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/script</text>
+<text text-anchor="middle" x="2155" y="-476.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">/ir_builder/ir/ir.h</text>
 </a>
 </g>
 </g>
 <!-- Node160&#45;&gt;Node163 -->
-<g id="edge89" class="edge">
+<g id="edge88" class="edge">
 <title>Node160&#45;&gt;Node163</title>
-<path fill="none" stroke="#191970" d="M2408.0607,-666.6697C2424.4646,-659.6786 2441.0813,-649.2739 2451,-634 2474.7328,-597.4539 2471.5142,-575.9013 2454,-536 2447.4702,-521.1236 2434.2875,-508.6537 2422.3524,-499.7121"/>
-<polygon fill="#191970" stroke="#191970" points="2406.5265,-663.5111 2398.4791,-670.4025 2409.0676,-670.0336 2406.5265,-663.5111"/>
+<path fill="none" stroke="#191970" d="M2116.0891,-660.0628C2111.3661,-631.7333 2105.6037,-578.5464 2119,-536 2123.2591,-522.4731 2132.476,-509.404 2140.5171,-499.816"/>
+<polygon fill="#191970" stroke="#191970" points="2112.7095,-661.0441 2117.9293,-670.2639 2119.5984,-659.8014 2112.7095,-661.0441"/>
 </g>
 <!-- Node165 -->
 <g id="node42" class="node">
 <title>Node165</title>
 <g id="a_node42"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
-<polygon fill="#ffffff" stroke="#ff0000" points="2019,-475 2019,-494 2159,-494 2159,-475 2019,-475"/>
-<text text-anchor="middle" x="2089" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/function.h</text>
+<polygon fill="#ffffff" stroke="#ff0000" points="1628,-475 1628,-494 1768,-494 1768,-475 1628,-475"/>
+<text text-anchor="middle" x="1698" y="-482" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">include/tvm/tir/function.h</text>
 </a>
 </g>
 </g>
 <!-- Node160&#45;&gt;Node165 -->
-<g id="edge90" class="edge">
+<g id="edge89" class="edge">
 <title>Node160&#45;&gt;Node165</title>
-<path fill="none" stroke="#191970" d="M2338.7851,-664.0382C2304.2914,-636.966 2231.6165,-580.6391 2168,-536 2146.2888,-520.7654 2120.4201,-504.1889 2104.3501,-494.0695"/>
-<polygon fill="#191970" stroke="#191970" points="2336.6793,-666.8348 2346.7033,-670.2657 2341.0067,-661.3326 2336.6793,-666.8348"/>
+<path fill="none" stroke="#191970" d="M2090.2242,-666.2058C2009.4055,-628.7649 1787.394,-525.9136 1718.5999,-494.0433"/>
+<polygon fill="#191970" stroke="#191970" points="2088.8155,-669.4105 2099.3604,-670.4383 2091.758,-663.0589 2088.8155,-669.4105"/>
 </g>
 <!-- Node161&#45;&gt;Node162 -->
-<g id="edge86" class="edge">
+<g id="edge85" class="edge">
 <title>Node161&#45;&gt;Node162</title>
-<path fill="none" stroke="#191970" d="M2388.8593,-593.0249C2388.461,-584.128 2388.0267,-574.4287 2387.6781,-566.6432"/>
-<polygon fill="#191970" stroke="#191970" points="2385.3754,-593.4633 2389.3193,-603.2967 2392.3683,-593.1501 2385.3754,-593.4633"/>
+<path fill="none" stroke="#191970" d="M2206.3908,-594.1932C2201.9925,-584.9844 2197.1145,-574.771 2193.2326,-566.6432"/>
+<polygon fill="#191970" stroke="#191970" points="2203.2706,-595.7815 2210.7387,-603.2967 2209.5871,-592.7646 2203.2706,-595.7815"/>
 </g>
 <!-- Node162&#45;&gt;Node163 -->
-<g id="edge87" class="edge">
+<g id="edge86" class="edge">
 <title>Node162&#45;&gt;Node163</title>
-<path fill="none" stroke="#191970" d="M2391.1344,-526.3179C2392.6081,-517.3414 2394.2215,-507.5143 2395.5138,-499.6432"/>
-<polygon fill="#191970" stroke="#191970" points="2387.6625,-525.8617 2389.4961,-536.2967 2394.57,-526.9958 2387.6625,-525.8617"/>
+<path fill="none" stroke="#191970" d="M2174.7536,-527.1932C2170.4928,-517.9844 2165.7672,-507.771 2162.0065,-499.6432"/>
+<polygon fill="#191970" stroke="#191970" points="2171.5899,-528.6908 2178.9656,-536.2967 2177.9429,-525.7513 2171.5899,-528.6908"/>
 </g>
 <!-- Node165&#45;&gt;Node23 -->
-<g id="edge91" class="edge">
+<g id="edge90" class="edge">
 <title>Node165&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2107.7802,-467.8512C2125.775,-449.9334 2147.772,-421.4008 2130,-402 2099.11,-368.2788 1764.8012,-375.1741 1720,-366 1641.5277,-349.9309 1553.311,-317.5928 1504.8484,-298.5249"/>
-<polygon fill="#191970" stroke="#191970" points="2105.2808,-465.3981 2100.4408,-474.8228 2110.1018,-470.4734 2105.2808,-465.3981"/>
+<path fill="none" stroke="#191970" d="M1693.8001,-464.9712C1688.4421,-445.3357 1677.0904,-416.0234 1655,-402 1572.4756,-349.612 870.1578,-392.2504 776,-366 725.6132,-351.9526 673.6098,-318.2262 646.0283,-298.5191"/>
+<polygon fill="#191970" stroke="#191970" points="1690.4108,-465.846 1696.1902,-474.7257 1697.2097,-464.18 1690.4108,-465.846"/>
 </g>
 <!-- Node165&#45;&gt;Node95 -->
-<g id="edge93" class="edge">
+<g id="edge92" class="edge">
 <title>Node165&#45;&gt;Node95</title>
-<path fill="none" stroke="#191970" d="M2046.7279,-472.0779C2002.4547,-459.0678 1933.8731,-438.9145 1893.6062,-427.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2045.752,-475.4391 2056.3332,-474.9005 2047.7256,-468.7231 2045.752,-475.4391"/>
+<path fill="none" stroke="#191970" d="M1643.5091,-472.7984C1583.0308,-459.8111 1486.5781,-439.0985 1430.4699,-427.0496"/>
+<polygon fill="#191970" stroke="#191970" points="1643.1174,-476.294 1653.6293,-474.9717 1644.5871,-469.4501 1643.1174,-476.294"/>
 </g>
-<!-- Node165&#45;&gt;Node146 -->
-<g id="edge92" class="edge">
-<title>Node165&#45;&gt;Node146</title>
-<path fill="none" stroke="#191970" d="M2157.7004,-473.3278C2231.4257,-461.3385 2348.2245,-442.3446 2424.7437,-429.9009"/>
-<polygon fill="#191970" stroke="#191970" points="2156.9005,-469.9119 2147.592,-474.9717 2158.0242,-476.8211 2156.9005,-469.9119"/>
+<!-- Node165&#45;&gt;Node145 -->
+<g id="edge91" class="edge">
+<title>Node165&#45;&gt;Node145</title>
+<path fill="none" stroke="#191970" d="M1771.5732,-473.4723C1853.6677,-461.1673 1985.8435,-441.3557 2068.9352,-428.9012"/>
+<polygon fill="#191970" stroke="#191970" points="1770.9402,-470.0279 1761.5695,-474.9717 1771.9779,-476.9506 1770.9402,-470.0279"/>
 </g>
 <!-- Node165&#45;&gt;Node153 -->
-<g id="edge94" class="edge">
+<g id="edge93" class="edge">
 <title>Node165&#45;&gt;Node153</title>
-<path fill="none" stroke="#191970" d="M2079.2086,-465.7565C2073.7253,-455.2598 2066.9801,-442.3476 2061.8472,-432.5218"/>
-<polygon fill="#191970" stroke="#191970" points="2076.2529,-467.6576 2083.9854,-474.9005 2082.4573,-464.4165 2076.2529,-467.6576"/>
+<path fill="none" stroke="#191970" d="M1672.2177,-469.9839C1652.5558,-458.9138 1625.5611,-443.7151 1605.6804,-432.5218"/>
+<polygon fill="#191970" stroke="#191970" points="1670.5192,-473.0442 1680.9502,-474.9005 1673.9535,-466.9446 1670.5192,-473.0442"/>
 </g>
 <!-- Node168&#45;&gt;Node21 -->
-<g id="edge98" class="edge">
+<g id="edge97" class="edge">
 <title>Node168&#45;&gt;Node21</title>
-<path fill="none" stroke="#191970" d="M1083.6861,-729.0488C1044.0074,-717.373 985.529,-700.1652 949.4269,-689.5419"/>
-<polygon fill="#191970" stroke="#191970" points="1082.7624,-732.4253 1093.3438,-731.8906 1084.7385,-725.71 1082.7624,-732.4253"/>
+<path fill="none" stroke="#191970" d="M1223.5926,-728.6009C1258.4906,-716.9366 1309.0723,-700.0302 1340.4519,-689.5419"/>
+<polygon fill="#191970" stroke="#191970" points="1222.1247,-725.4011 1213.75,-731.8906 1224.3438,-732.04 1222.1247,-725.4011"/>
 </g>
 <!-- Node168&#45;&gt;Node22 -->
-<g id="edge99" class="edge">
+<g id="edge98" class="edge">
 <title>Node168&#45;&gt;Node22</title>
-<path fill="none" stroke="#191970" d="M1158.5444,-727.9307C1218.6825,-702.8561 1345.8246,-649.8443 1397.9594,-628.1068"/>
-<polygon fill="#191970" stroke="#191970" points="1157.0311,-724.7695 1149.1482,-731.8484 1159.725,-731.2304 1157.0311,-724.7695"/>
+<path fill="none" stroke="#191970" d="M1183.8749,-721.7305C1182.3508,-694.9497 1179.6894,-648.186 1178.5467,-628.1068"/>
+<polygon fill="#191970" stroke="#191970" points="1180.3881,-722.0634 1184.4507,-731.8484 1187.3768,-721.6657 1180.3881,-722.0634"/>
 </g>
 <!-- Node168&#45;&gt;Node111 -->
-<g id="edge100" class="edge">
+<g id="edge99" class="edge">
 <title>Node168&#45;&gt;Node111</title>
-<path fill="none" stroke="#191970" d="M1101.6735,-726.1122C1062.0288,-699.4073 984.9094,-640.2548 955,-567 949.792,-554.2445 950.7773,-549.1147 955,-536 959.3864,-522.3767 968.8592,-509.3091 977.1214,-499.7459"/>
-<polygon fill="#191970" stroke="#191970" points="1100.1682,-729.3123 1110.4422,-731.8999 1104.0243,-723.4701 1100.1682,-729.3123"/>
+<path fill="none" stroke="#191970" d="M1115.88,-736.8915C941.0809,-723.6552 488.8947,-679.041 399,-567 390.3778,-556.2536 391.1433,-547.3181 399,-536 409.062,-521.5052 448.3771,-507.6723 482.4829,-497.9997"/>
+<polygon fill="#191970" stroke="#191970" points="1115.7583,-740.392 1125.9909,-737.6454 1116.2789,-733.4114 1115.7583,-740.392"/>
 </g>
 <!-- Node159&#45;&gt;Node53 -->
-<g id="edge111" class="edge">
+<g id="edge110" class="edge">
 <title>Node159&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M1039.5753,-330.3561C1057.0083,-320.1995 1077.8031,-308.0843 1093.88,-298.7177"/>
-<polygon fill="#191970" stroke="#191970" points="1037.6868,-327.4056 1030.8082,-335.4639 1041.2107,-333.454 1037.6868,-327.4056"/>
+<path fill="none" stroke="#191970" d="M1748.4183,-328.7808C1736.3722,-318.9383 1722.4233,-307.541 1711.5334,-298.6432"/>
+<polygon fill="#191970" stroke="#191970" points="1746.4346,-331.6797 1756.3929,-335.2967 1750.8637,-326.2591 1746.4346,-331.6797"/>
 </g>
 <!-- Node214&#45;&gt;Node23 -->
-<g id="edge119" class="edge">
+<g id="edge118" class="edge">
 <title>Node214&#45;&gt;Node23</title>
-<path fill="none" stroke="#191970" d="M2204.8395,-407.7742C2190.9968,-405.8332 2176.5337,-403.8297 2163,-402 2039.536,-385.3079 2007.955,-386.1011 1885,-366 1753.5379,-344.5082 1600.1525,-312.3095 1521.1136,-295.1867"/>
-<polygon fill="#191970" stroke="#191970" points="2204.3974,-411.2464 2214.7874,-409.1732 2205.3723,-404.3146 2204.3974,-411.2464"/>
+<path fill="none" stroke="#191970" d="M1881.4638,-412.2331C1837.1373,-408.9563 1778.1809,-404.8403 1726,-402 1625.292,-396.5183 914.3445,-398.8874 819,-366 796.728,-358.3177 796.9226,-345.8309 776,-335 745.313,-319.1144 708.6642,-306.4272 679.2739,-297.6202"/>
+<polygon fill="#191970" stroke="#191970" points="1881.3138,-415.7315 1891.5458,-412.9826 1881.8328,-408.7508 1881.3138,-415.7315"/>
 </g>
 <!-- Node214&#45;&gt;Node27 -->
-<g id="edge132" class="edge">
+<g id="edge131" class="edge">
 <title>Node214&#45;&gt;Node27</title>
-<path fill="none" stroke="#191970" d="M2205.0299,-409.172C2181.3201,-406.5222 2154.5147,-403.8119 2130,-402 1726.7831,-372.1979 1621.8075,-419.1539 1221,-366 1216.4388,-365.3951 1211.7333,-364.6283 1207.0271,-363.7609"/>
-<polygon fill="#191970" stroke="#191970" points="2204.6552,-412.6518 2214.9859,-410.3011 2205.4441,-405.6964 2204.6552,-412.6518"/>
+<path fill="none" stroke="#191970" d="M1881.7303,-413.5764C1821.9502,-410.2212 1732.7377,-405.4085 1655,-402 1200.0448,-382.052 1085.7704,-389.7918 631,-366 573.254,-362.979 507.1844,-358.2238 462.0424,-354.7766"/>
+<polygon fill="#191970" stroke="#191970" points="1881.621,-417.0757 1891.802,-414.1436 1882.0146,-410.0868 1881.621,-417.0757"/>
 </g>
 <!-- Node214&#45;&gt;Node45 -->
-<g id="edge120" class="edge">
+<g id="edge119" class="edge">
 <title>Node214&#45;&gt;Node45</title>
-<path fill="none" stroke="#191970" d="M2205.0845,-405.9861C2150.7198,-396.4004 2072.6471,-381.806 2005,-366 1916.5124,-345.3246 1814.7624,-316.0591 1755.6841,-298.5121"/>
-<polygon fill="#191970" stroke="#191970" points="2204.4975,-409.4365 2214.9519,-407.7168 2205.7069,-402.5417 2204.4975,-409.4365"/>
+<path fill="none" stroke="#191970" d="M1881.7254,-413.6849C1821.9421,-410.3987 1732.7278,-405.6264 1655,-402 1262.6563,-383.6954 1159.9024,-427.6469 772,-366 680.4047,-351.4433 576.6926,-318.0723 520.5244,-298.5245"/>
+<polygon fill="#191970" stroke="#191970" points="1881.6202,-417.1843 1891.7976,-414.2398 1882.0053,-410.1949 1881.6202,-417.1843"/>
 </g>
 <!-- Node214&#45;&gt;Node46 -->
-<g id="edge124" class="edge">
+<g id="edge123" class="edge">
 <title>Node214&#45;&gt;Node46</title>
-<path fill="none" stroke="#191970" d="M2340.9989,-409.5292C2364.711,-406.9091 2391.5105,-404.1263 2416,-402 2477.1228,-396.6931 2925.0551,-410.7748 2967,-366 3062.3102,-264.2595 2829.3022,-139.937 2740.0426,-97.5105"/>
-<polygon fill="#191970" stroke="#191970" points="2340.5917,-406.0529 2331.0413,-410.6398 2341.3676,-413.0097 2340.5917,-406.0529"/>
+<path fill="none" stroke="#191970" d="M1940.7299,-393.013C1927.5112,-356.3255 1906.0944,-289.1397 1918,-268 1971.7305,-172.5958 2096.5206,-120.6317 2169.0563,-97.5143"/>
+<polygon fill="#191970" stroke="#191970" points="1937.4761,-394.3059 1944.2064,-402.4884 1944.0477,-391.8947 1937.4761,-394.3059"/>
 </g>
 <!-- Node214&#45;&gt;Node47 -->
-<g id="edge130" class="edge">
+<g id="edge129" class="edge">
 <title>Node214&#45;&gt;Node47</title>
-<path fill="none" stroke="#191970" d="M2340.9957,-409.493C2364.7076,-406.8698 2391.5078,-404.0943 2416,-402 2479.6707,-396.5555 2944.5467,-410.9177 2990,-366 3088.28,-268.8781 2942.3444,-84.5665 2895.5987,-30.6631"/>
-<polygon fill="#191970" stroke="#191970" points="2340.5879,-406.0167 2331.0383,-410.6054 2341.3651,-412.9734 2340.5879,-406.0167"/>
+<path fill="none" stroke="#191970" d="M2018.0721,-407.0207C2031.9218,-405.1387 2046.41,-403.3497 2060,-402 2197.3342,-388.3602 2547.546,-404.764 2680,-366 2751.9032,-344.9568 2827,-358.4192 2827,-283.5 2827,-283.5 2827,-283.5 2827,-149.5 2827,-91.7188 2657.8096,-49.3672 2555.3701,-29.0202"/>
+<polygon fill="#191970" stroke="#191970" points="2017.5457,-403.5601 2008.1222,-408.4025 2018.5087,-410.4936 2017.5457,-403.5601"/>
 </g>
 <!-- Node214&#45;&gt;Node48 -->
-<g id="edge131" class="edge">
+<g id="edge130" class="edge">
 <title>Node214&#45;&gt;Node48</title>
-<path fill="none" stroke="#191970" d="M2235.8334,-397.1618C2222.3193,-388.6569 2207.5449,-377.9595 2196,-366 2163.7642,-332.6067 2094.245,-208.3137 2070.3023,-164.7241"/>
-<polygon fill="#191970" stroke="#191970" points="2234.1232,-400.2181 2244.4855,-402.4253 2237.7613,-394.2378 2234.1232,-400.2181"/>
+<path fill="none" stroke="#191970" d="M1934.5801,-393.7498C1899.5041,-339.7246 1814.5567,-208.8861 1785.7455,-164.5103"/>
+<polygon fill="#191970" stroke="#191970" points="1931.7654,-395.8419 1940.1465,-402.3233 1937.6365,-392.03 1931.7654,-395.8419"/>
 </g>
 <!-- Node214&#45;&gt;Node49 -->
-<g id="edge122" class="edge">
+<g id="edge121" class="edge">
 <title>Node214&#45;&gt;Node49</title>
-<path fill="none" stroke="#191970" d="M2275.4407,-392.3415C2278.1114,-364.8131 2282.3065,-321.5714 2284.5194,-298.7614"/>
-<polygon fill="#191970" stroke="#191970" points="2271.9481,-392.0977 2274.466,-402.389 2278.9153,-392.7737 2271.9481,-392.0977"/>
+<path fill="none" stroke="#191970" d="M1959.6631,-393.0686C1970.5355,-365.58 1987.8621,-321.7732 1996.9638,-298.7614"/>
+<polygon fill="#191970" stroke="#191970" points="1956.4001,-391.8026 1955.9767,-402.389 1962.9095,-394.3772 1956.4001,-391.8026"/>
 </g>
 <!-- Node214&#45;&gt;Node50 -->
-<g id="edge128" class="edge">
+<g id="edge127" class="edge">
 <title>Node214&#45;&gt;Node50</title>
-<path fill="none" stroke="#191970" d="M2248.0978,-395.5217C2215.8473,-364.444 2167.5176,-307.0948 2201,-268 2217.3851,-248.8684 2363.1911,-232.0446 2455.7404,-223.1673"/>
-<polygon fill="#191970" stroke="#191970" points="2245.808,-398.1728 2255.4938,-402.4666 2250.5997,-393.0698 2245.808,-398.1728"/>
+<path fill="none" stroke="#191970" d="M1977.7205,-396.1366C2006.5345,-373.3846 2052.3234,-335.6689 2088,-299 2109.3589,-277.047 2131.1896,-248.8754 2143.8993,-231.7806"/>
+<polygon fill="#191970" stroke="#191970" points="1975.417,-393.4952 1969.7098,-402.4215 1979.7378,-399.0025 1975.417,-393.4952"/>
 </g>
 <!-- Node214&#45;&gt;Node52 -->
-<g id="edge125" class="edge">
+<g id="edge124" class="edge">
 <title>Node214&#45;&gt;Node52</title>
-<path fill="none" stroke="#191970" d="M2206.4163,-399.7371C2172.5528,-390.3514 2130.8659,-378.2564 2094,-366 2029.6797,-344.6161 1956.2018,-315.8964 1913.0856,-298.5951"/>
-<polygon fill="#191970" stroke="#191970" points="2205.6332,-403.1517 2216.2037,-402.435 2207.4933,-396.4034 2205.6332,-403.1517"/>
+<path fill="none" stroke="#191970" d="M1881.5284,-410.303C1812.256,-402.2883 1702.3606,-387.5577 1609,-366 1529.2685,-347.5894 1438.7074,-316.9642 1387.3591,-298.6568"/>
+<polygon fill="#191970" stroke="#191970" points="1881.5351,-413.8264 1891.8678,-411.4842 1882.3297,-406.8716 1881.5351,-413.8264"/>
 </g>
 <!-- Node214&#45;&gt;Node53 -->
-<g id="edge129" class="edge">
+<g id="edge128" class="edge">
 <title>Node214&#45;&gt;Node53</title>
-<path fill="none" stroke="#191970" d="M2205.0283,-409.1927C2181.3185,-406.5446 2154.5134,-403.8302 2130,-402 2032.8549,-394.7472 1345.0784,-400.5615 1254,-366 1233.622,-358.2671 1234.1312,-347.0963 1216,-335 1195.3893,-321.2494 1170.7341,-308.0861 1151.5709,-298.5097"/>
-<polygon fill="#191970" stroke="#191970" points="2204.654,-412.6726 2214.9845,-410.3208 2205.4421,-405.7171 2204.654,-412.6726"/>
+<path fill="none" stroke="#191970" d="M1930.3201,-394.5265C1913.3777,-376.0636 1887.3916,-350.703 1860,-335 1831.1776,-318.4768 1796.4581,-306.6664 1766.1595,-298.5602"/>
+<polygon fill="#191970" stroke="#191970" points="1927.9382,-397.1126 1937.2227,-402.2163 1933.1474,-392.4366 1927.9382,-397.1126"/>
 </g>
 <!-- Node214&#45;&gt;Node57 -->
-<g id="edge133" class="edge">
+<g id="edge132" class="edge">
 <title>Node214&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M2205.161,-401.2417C2180.1008,-393.2039 2152.3791,-381.7602 2130,-366 2104.0497,-347.7248 2082.427,-317.0541 2070.929,-298.7016"/>
-<polygon fill="#191970" stroke="#191970" points="2204.3942,-404.6676 2214.9814,-404.2646 2206.4537,-397.9774 2204.3942,-404.6676"/>
+<path fill="none" stroke="#191970" d="M1881.8908,-407.8874C1828.8911,-399.482 1753.8663,-385.5448 1690,-366 1658.3154,-356.3036 1574.7017,-319.6445 1527.5588,-298.6007"/>
+<polygon fill="#191970" stroke="#191970" points="1881.4344,-411.3585 1891.8548,-409.4437 1882.5147,-404.4424 1881.4344,-411.3585"/>
 </g>
-<!-- Node214&#45;&gt;Node147 -->
-<g id="edge121" class="edge">
-<title>Node214&#45;&gt;Node147</title>
-<path fill="none" stroke="#191970" d="M2341.4235,-409.6255C2456.1952,-396.3178 2675.7635,-370.3928 2683,-366 2708.8942,-350.2813 2725.921,-317.8173 2734.1721,-298.6327"/>
-<polygon fill="#191970" stroke="#191970" points="2340.8576,-406.1675 2331.3268,-410.795 2341.6631,-413.121 2340.8576,-406.1675"/>
+<!-- Node214&#45;&gt;Node146 -->
+<g id="edge120" class="edge">
+<title>Node214&#45;&gt;Node146</title>
+<path fill="none" stroke="#191970" d="M2018.1632,-407.794C2032.0057,-405.8514 2046.468,-403.8423 2060,-402 2181.7017,-385.4309 2223.0002,-418.5821 2334,-366 2352.1858,-357.3851 2351.4486,-347.7707 2367,-335 2382.9152,-321.9305 2401.8962,-308.4328 2416.373,-298.5361"/>
+<polygon fill="#191970" stroke="#191970" points="2017.6302,-404.3344 2008.2153,-409.1934 2018.6054,-411.2662 2017.6302,-404.3344"/>
+</g>
+<!-- Node214&#45;&gt;Node148 -->
+<g id="edge122" class="edge">
+<title>Node214&#45;&gt;Node148</title>
+<path fill="none" stroke="#191970" d="M2018.0746,-407.0455C2031.9241,-405.1616 2046.4116,-403.3656 2060,-402 2126.0104,-395.3663 2603.0473,-405.9674 2656,-366 2677.085,-350.0856 2682.9048,-317.6919 2684.4699,-298.5725"/>
+<polygon fill="#191970" stroke="#191970" points="2017.5479,-403.585 2008.1248,-408.428 2018.5113,-410.5184 2017.5479,-403.585"/>
 </g>
 <!-- Node214&#45;&gt;Node149 -->
-<g id="edge123" class="edge">
+<g id="edge125" class="edge">
 <title>Node214&#45;&gt;Node149</title>
-<path fill="none" stroke="#191970" d="M2306.6713,-397.0839C2351.409,-369.9579 2429.4128,-322.6615 2469.1692,-298.5558"/>
-<polygon fill="#191970" stroke="#191970" points="2304.6582,-394.2114 2297.9219,-402.389 2308.2875,-400.197 2304.6582,-394.2114"/>
-</g>
-<!-- Node214&#45;&gt;Node150 -->
-<g id="edge126" class="edge">
-<title>Node214&#45;&gt;Node150</title>
-<path fill="none" stroke="#191970" d="M2341.167,-410.0005C2455.7192,-397.3979 2686.0226,-372.0607 2805.7676,-358.8868"/>
-<polygon fill="#191970" stroke="#191970" points="2340.6749,-406.5334 2331.1176,-411.1061 2341.4404,-413.4915 2340.6749,-406.5334"/>
+<path fill="none" stroke="#191970" d="M2017.9061,-402.2836C2067.0344,-391.2749 2133.1097,-376.4687 2182.0226,-365.5083"/>
+<polygon fill="#191970" stroke="#191970" points="2016.9974,-398.9003 2008.0047,-404.5023 2018.5281,-405.7309 2016.9974,-398.9003"/>
 </g>
 <!-- Node214&#45;&gt;Node159 -->
-<g id="edge127" class="edge">
+<g id="edge126" class="edge">
 <title>Node214&#45;&gt;Node159</title>
-<path fill="none" stroke="#191970" d="M2205.0349,-409.1026C2181.3256,-406.447 2154.5192,-403.7508 2130,-402 1668.6756,-369.0583 1549.8299,-415.6167 1090,-366 1087.1664,-365.6942 1084.2852,-365.3485 1081.379,-364.9704"/>
-<polygon fill="#191970" stroke="#191970" points="2204.6591,-412.5823 2214.9907,-410.2353 2205.4505,-405.6272 2204.6591,-412.5823"/>
+<path fill="none" stroke="#191970" d="M1901.3265,-398.865C1873.9333,-388.3773 1840.1717,-375.4515 1814.4447,-365.6017"/>
+<polygon fill="#191970" stroke="#191970" points="1900.1362,-402.157 1910.7266,-402.4639 1902.6391,-395.6197 1900.1362,-402.157"/>
 </g>
 <!-- Node193&#45;&gt;Node165 -->
-<g id="edge139" class="edge">
+<g id="edge138" class="edge">
 <title>Node193&#45;&gt;Node165</title>
-<path fill="none" stroke="#191970" d="M2042.8473,-600.138C2031.5802,-583.1362 2019.1952,-557.3372 2029,-536 2037.5979,-517.2895 2056.7195,-502.8242 2071.0805,-494.0531"/>
-<polygon fill="#191970" stroke="#191970" points="2040.2042,-602.4616 2048.8437,-608.5942 2045.9143,-598.4124 2040.2042,-602.4616"/>
+<path fill="none" stroke="#191970" d="M1709.9234,-598.624C1706.8657,-569.3572 1701.2692,-515.7914 1698.9977,-494.0496"/>
+<polygon fill="#191970" stroke="#191970" points="1706.4729,-599.2809 1710.9932,-608.8631 1713.435,-598.5535 1706.4729,-599.2809"/>
 </g>
 <!-- Node193&#45;&gt;Node194 -->
-<g id="edge135" class="edge">
+<g id="edge134" class="edge">
 <title>Node193&#45;&gt;Node194</title>
-<path fill="none" stroke="#191970" d="M2068.2536,-600.1099C2075.9827,-587.4795 2085.9301,-571.2241 2092.1366,-561.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2065.1086,-598.544 2062.8743,-608.9005 2071.0794,-602.1977 2065.1086,-598.544"/>
+<path fill="none" stroke="#191970" d="M1690.7304,-603.0102C1672.9577,-590.067 1648.1761,-572.0195 1633.1569,-561.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1688.6746,-605.8428 1698.8186,-608.9005 1692.7955,-600.1843 1688.6746,-605.8428"/>
 </g>
 <!-- Node194&#45;&gt;Node57 -->
-<g id="edge136" class="edge">
+<g id="edge135" class="edge">
 <title>Node194&#45;&gt;Node57</title>
-<path fill="none" stroke="#191970" d="M2129.3301,-537.0348C2143.9165,-528.4978 2159.8235,-516.1666 2168,-500 2174.2183,-487.7053 2170.0844,-482.6192 2168,-469 2163.279,-438.153 2165.6095,-427.0502 2147,-402 2130.0188,-379.1417 2111.7293,-388.2831 2094,-366 2077.8353,-345.6833 2069.0672,-316.237 2064.9746,-298.5691"/>
-<polygon fill="#191970" stroke="#191970" points="2127.5964,-533.9936 2120.4995,-541.8603 2130.9532,-540.1363 2127.5964,-533.9936"/>
+<path fill="none" stroke="#191970" d="M1615.5472,-531.8714C1612.5975,-514.6206 1610.6364,-489.1916 1619,-469 1627.6591,-448.095 1646.3409,-453.905 1655,-433 1660.2725,-420.271 1663.5301,-412.8196 1655,-402 1612.553,-348.1604 1550.9194,-416.9102 1505,-366 1488.4507,-347.652 1489.1022,-317.0057 1491.3593,-298.6775"/>
+<polygon fill="#191970" stroke="#191970" points="1612.1549,-532.7591 1617.5297,-541.8894 1619.0217,-531.4001 1612.1549,-532.7591"/>
 </g>
 <!-- Node194&#45;&gt;Node95 -->
-<g id="edge137" class="edge">
+<g id="edge136" class="edge">
 <title>Node194&#45;&gt;Node95</title>
-<path fill="none" stroke="#191970" d="M2071.9474,-536.7698C2023.037,-509.1158 1918.7257,-450.1382 1877.89,-427.0496"/>
-<polygon fill="#191970" stroke="#191970" points="2070.5279,-539.988 2080.9555,-541.8631 2073.9732,-533.8945 2070.5279,-539.988"/>
+<path fill="none" stroke="#191970" d="M1594.2771,-536.7698C1545.9859,-509.1158 1442.995,-450.1382 1402.6762,-427.0496"/>
+<polygon fill="#191970" stroke="#191970" points="1592.7541,-539.9309 1603.1713,-541.8631 1596.2327,-533.8564 1592.7541,-539.9309"/>
 </g>
 <!-- Node194&#45;&gt;Node165 -->
-<g id="edge138" class="edge">
+<g id="edge137" class="edge">
 <title>Node194&#45;&gt;Node165</title>
-<path fill="none" stroke="#191970" d="M2095.3788,-531.9863C2093.7053,-519.5286 2091.6094,-503.9258 2090.2871,-494.0817"/>
-<polygon fill="#191970" stroke="#191970" points="2091.9103,-532.4555 2096.7105,-541.9005 2098.848,-531.5236 2091.9103,-532.4555"/>
+<path fill="none" stroke="#191970" d="M1638.8606,-535.2992C1653.8624,-522.4131 1674.3431,-504.8207 1686.8452,-494.0817"/>
+<polygon fill="#191970" stroke="#191970" points="1636.4806,-532.7296 1631.1755,-541.9005 1641.0418,-538.0396 1636.4806,-532.7296"/>
 </g>
 </g>
 </svg>
diff --git a/docs/reference/api/doxygen/auto__schedule_8h__incl.svg b/docs/reference/api/doxygen/auto__schedule_8h__incl.svg
index 7b2c1ed819..250ae6ea51 100644
--- a/docs/reference/api/doxygen/auto__schedule_8h__incl.svg
+++ b/docs/reference/api/doxygen/auto__schedule_8h__incl.svg
@@ -44,9 +44,9 @@
 <path fill="none" stroke="#191970" d="M433.9234,-1241.296C291.7515,-1231.3455 0,-1200.1288 0,-1111.5 0,-1111.5 0,-1111.5 0,-189 0,-150.2415 22.3881,-140.4423 57,-123 115.5717,-93.4834 579.8935,-76.629 708.233,-72.5029"/>
 <polygon fill="#191970" stroke="#191970" points="708.3599,-76.0007 718.2438,-72.1851 708.1377,-69.0042 708.3599,-76.0007"/>
 </g>
-<!-- Node90 -->
+<!-- Node89 -->
 <g id="node50" class="node">
-<title>Node90</title>
+<title>Node89</title>
 <g id="a_node50"><a xlink:href="search__policy_8h.html" target="_top" xlink:title="The base class of search policies, including the abstract definition of search policy and other suppo...">
 <polygon fill="#ffffff" stroke="#000000" points="1690.5,-1163.5 1690.5,-1193.5 1803.5,-1193.5 1803.5,-1163.5 1690.5,-1163.5"/>
 <text text-anchor="start" x="1698.5" y="-1181.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/auto_scheduler</text>
@@ -54,9 +54,9 @@
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node90 -->
+<!-- Node0&#45;&gt;Node89 -->
 <g id="edge180" class="edge">
-<title>Node0&#45;&gt;Node90</title>
+<title>Node0&#45;&gt;Node89</title>
 <path fill="none" stroke="#191970" d="M586.4464,-1241.3594C811.3705,-1229.1768 1468.3478,-1193.5927 1680.2691,-1182.1144"/>
 <polygon fill="#191970" stroke="#191970" points="1680.5231,-1185.6058 1690.3192,-1181.57 1680.1445,-1178.6161 1680.5231,-1185.6058"/>
 </g>
@@ -1509,45 +1509,45 @@
 <path fill="none" stroke="#191970" d="M991.507,-548.4581C976.5938,-539.6003 954.2445,-525.7461 936,-512 787.3682,-400.0148 779.562,-334.0268 622,-235 600.1331,-221.2567 573.4502,-209.9165 551.958,-201.9342"/>
 <polygon fill="#191970" stroke="#191970" points="553.0786,-198.6177 542.4844,-198.5064 550.6969,-205.2001 553.0786,-198.6177"/>
 </g>
-<!-- Node90&#45;&gt;Node1 -->
+<!-- Node89&#45;&gt;Node1 -->
 <g id="edge181" class="edge">
-<title>Node90&#45;&gt;Node1</title>
+<title>Node89&#45;&gt;Node1</title>
 <path fill="none" stroke="#191970" d="M1690.2267,-1175.425C1486.834,-1164.4085 795.1167,-1126.9429 576.6765,-1115.1114"/>
 <polygon fill="#191970" stroke="#191970" points="576.8109,-1111.6137 566.6362,-1114.5676 576.4322,-1118.6034 576.8109,-1111.6137"/>
 </g>
-<!-- Node90&#45;&gt;Node6 -->
+<!-- Node89&#45;&gt;Node6 -->
 <g id="edge183" class="edge">
-<title>Node90&#45;&gt;Node6</title>
+<title>Node89&#45;&gt;Node6</title>
 <path fill="none" stroke="#191970" d="M1803.6977,-1175.1316C1876.5316,-1169.6613 1997.4583,-1156.3488 2030,-1127 2058.3574,-1101.425 2054,-1082.6867 2054,-1044.5 2054,-1044.5 2054,-1044.5 2054,-614 2054,-567.0456 2004.2031,-533.5403 1969.8629,-516.0171"/>
 <polygon fill="#191970" stroke="#191970" points="1971.1616,-512.756 1960.6413,-511.5006 1968.0826,-519.0425 1971.1616,-512.756"/>
 </g>
-<!-- Node90&#45;&gt;Node17 -->
+<!-- Node89&#45;&gt;Node17 -->
 <g id="edge184" class="edge">
-<title>Node90&#45;&gt;Node17</title>
+<title>Node89&#45;&gt;Node17</title>
 <path fill="none" stroke="#191970" d="M1803.801,-1172.8842C1840.1513,-1166.7428 1886.4295,-1153.8904 1919,-1127 2018.2459,-1045.0619 1978,-972.1997 1978,-843.5 1978,-843.5 1978,-843.5 1978,-670 1978,-470.5078 1584.7828,-447.7464 1721,-302 1801.9091,-215.4309 2172.0909,-352.5691 2253,-266 2262.4077,-255.9341 2261.2271,-246.0518 2253,-235 2185.1867,-143.9035 1815.6804,-89.8556 1703.9907,-75.4261"/>
 <polygon fill="#191970" stroke="#191970" points="1704.368,-71.946 1694.0057,-74.1535 1703.4829,-78.8898 1704.368,-71.946"/>
 </g>
-<!-- Node90&#45;&gt;Node19 -->
+<!-- Node89&#45;&gt;Node19 -->
 <g id="edge186" class="edge">
-<title>Node90&#45;&gt;Node19</title>
+<title>Node89&#45;&gt;Node19</title>
 <path fill="none" stroke="#191970" d="M1690.1425,-1177.3903C1472.3162,-1172.9622 690.8357,-1155.4085 444,-1127 261.0754,-1105.9471 38,-1228.6321 38,-1044.5 38,-1044.5 38,-1044.5 38,-189 38,-150.2415 60.4265,-140.5185 95,-123 149.9814,-95.1407 584.183,-77.2406 708.0922,-72.6631"/>
 <polygon fill="#191970" stroke="#191970" points="708.5788,-76.1477 718.4444,-72.2851 708.3234,-69.1524 708.5788,-76.1477"/>
 </g>
-<!-- Node90&#45;&gt;Node21 -->
+<!-- Node89&#45;&gt;Node21 -->
 <g id="edge187" class="edge">
-<title>Node90&#45;&gt;Node21</title>
+<title>Node89&#45;&gt;Node21</title>
 <path fill="none" stroke="#191970" d="M1754.9052,-1163.2863C1766.8193,-1138.9046 1788,-1089.2644 1788,-1044.5 1788,-1044.5 1788,-1044.5 1788,-843.5 1788,-526.6544 1510.2075,-550.1356 1370,-266 1360.8204,-247.3972 1354.5452,-224.4285 1350.8972,-208.441"/>
 <polygon fill="#191970" stroke="#191970" points="1354.3112,-207.6687 1348.788,-198.6275 1347.4675,-209.1397 1354.3112,-207.6687"/>
 </g>
-<!-- Node90&#45;&gt;Node83 -->
+<!-- Node89&#45;&gt;Node83 -->
 <g id="edge182" class="edge">
-<title>Node90&#45;&gt;Node83</title>
+<title>Node89&#45;&gt;Node83</title>
 <path fill="none" stroke="#191970" d="M1737.753,-1163.389C1723.6037,-1140.267 1696.4545,-1095.9012 1679.6193,-1068.3901"/>
 <polygon fill="#191970" stroke="#191970" points="1682.5441,-1066.4642 1674.3391,-1059.7614 1676.5734,-1070.118 1682.5441,-1066.4642"/>
 </g>
-<!-- Node90&#45;&gt;Node77 -->
+<!-- Node89&#45;&gt;Node77 -->
 <g id="edge185" class="edge">
-<title>Node90&#45;&gt;Node77</title>
+<title>Node89&#45;&gt;Node77</title>
 <path fill="none" stroke="#191970" d="M1803.6023,-1165.0192C1860.5511,-1147.6865 1940,-1111.4996 1940,-1044.5 1940,-1044.5 1940,-1044.5 1940,-670 1940,-629.4297 1904.6719,-593.5972 1880.6782,-573.9394"/>
 <polygon fill="#191970" stroke="#191970" points="1882.76,-571.1237 1872.7405,-567.6795 1878.4253,-576.6202 1882.76,-571.1237"/>
 </g>
diff --git a/docs/reference/api/doxygen/auto__scheduler_2feature_8h__incl.svg b/docs/reference/api/doxygen/auto__scheduler_2feature_8h__incl.svg
index d83b34354e..f53220e815 100644
--- a/docs/reference/api/doxygen/auto__scheduler_2feature_8h__incl.svg
+++ b/docs/reference/api/doxygen/auto__scheduler_2feature_8h__incl.svg
@@ -72,18 +72,18 @@
 <path fill="none" stroke="#191970" d="M2568,-1096.2967C2568,-1088.5013 2568,-1078.7991 2568,-1069.9064"/>
 <polygon fill="#191970" stroke="#191970" points="2571.5001,-1069.6431 2568,-1059.6432 2564.5001,-1069.6432 2571.5001,-1069.6431"/>
 </g>
-<!-- Node90 -->
+<!-- Node89 -->
 <g id="node48" class="node">
-<title>Node90</title>
+<title>Node89</title>
 <g id="a_node48"><a xlink:href="tir_2function_8h.html" target="_top" xlink:title="TIR Function. ">
 <polygon fill="#ffffff" stroke="#000000" points="1011.5,-537.5 1011.5,-556.5 1112.5,-556.5 1112.5,-537.5 1011.5,-537.5"/>
 <text text-anchor="middle" x="1062" y="-544.5" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/tir/function.h</text>
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node90 -->
+<!-- Node0&#45;&gt;Node89 -->
 <g id="edge161" class="edge">
-<title>Node0&#45;&gt;Node90</title>
+<title>Node0&#45;&gt;Node89</title>
 <path fill="none" stroke="#191970" d="M2491.6829,-1110.1944C2100.7939,-1103.1921 332.2921,-1067.5912 100,-993 87.9102,-989.1178 76,-990.1978 76,-977.5 76,-977.5 76,-977.5 76,-659 76,-565.4013 786.5265,-550.0038 1001.2611,-547.4876"/>
 <polygon fill="#191970" stroke="#191970" points="1001.4469,-550.9859 1011.4071,-547.3741 1001.3685,-543.9863 1001.4469,-550.9859"/>
 </g>
@@ -1380,33 +1380,33 @@
 <path fill="none" stroke="#191970" d="M1880.1612,-374.6069C1905.924,-372.6713 1937.0176,-370.5009 1965,-369 2441.4379,-343.4448 3018.2638,-328.4468 3193.8049,-324.2331"/>
 <polygon fill="#191970" stroke="#191970" points="3193.9001,-327.7319 3203.8137,-323.9941 3193.733,-320.7339 3193.9001,-327.7319"/>
 </g>
-<!-- Node90&#45;&gt;Node17 -->
+<!-- Node89&#45;&gt;Node17 -->
 <g id="edge182" class="edge">
-<title>Node90&#45;&gt;Node17</title>
+<title>Node89&#45;&gt;Node17</title>
 <path fill="none" stroke="#191970" d="M1011.2899,-538.6215C906.534,-519.114 662.9389,-462.1408 506,-333 461.0329,-295.9978 448.836,-278.2171 438,-221 435.4363,-207.4628 437.487,-203.7682 438,-190 439.1119,-160.1601 430.6198,-150.1732 443,-123 449.4023,-108.9478 461.386,-96.5335 472.1288,-87.4159"/>
 <polygon fill="#191970" stroke="#191970" points="474.3765,-90.0999 479.989,-81.1138 469.9977,-84.6386 474.3765,-90.0999"/>
 </g>
-<!-- Node90&#45;&gt;Node60 -->
+<!-- Node89&#45;&gt;Node60 -->
 <g id="edge180" class="edge">
-<title>Node90&#45;&gt;Node60</title>
+<title>Node89&#45;&gt;Node60</title>
 <path fill="none" stroke="#191970" d="M1112.7337,-538.2582C1233.6521,-517.423 1537.8309,-465.0107 1660.0089,-443.9585"/>
 <polygon fill="#191970" stroke="#191970" points="1660.9585,-447.3465 1670.2189,-442.1992 1659.7698,-440.4482 1660.9585,-447.3465"/>
 </g>
-<!-- Node90&#45;&gt;Node61 -->
+<!-- Node89&#45;&gt;Node61 -->
 <g id="edge179" class="edge">
-<title>Node90&#45;&gt;Node61</title>
+<title>Node89&#45;&gt;Node61</title>
 <path fill="none" stroke="#191970" d="M1064.2232,-537.2765C1070.0877,-513.9461 1088.2611,-454.5427 1127,-425 1150.0727,-407.4045 1223.8646,-393.8483 1276.0435,-386.1469"/>
 <polygon fill="#191970" stroke="#191970" points="1276.7723,-389.5781 1286.1696,-384.6853 1275.7722,-382.6499 1276.7723,-389.5781"/>
 </g>
-<!-- Node90&#45;&gt;Node63 -->
+<!-- Node89&#45;&gt;Node63 -->
 <g id="edge181" class="edge">
-<title>Node90&#45;&gt;Node63</title>
+<title>Node89&#45;&gt;Node63</title>
 <path fill="none" stroke="#191970" d="M1112.8066,-541.5285C1211.7825,-530.8696 1429.4896,-507.4242 1529.6701,-496.6355"/>
 <polygon fill="#191970" stroke="#191970" points="1530.2863,-500.0895 1539.8541,-495.5388 1529.5368,-493.1297 1530.2863,-500.0895"/>
 </g>
-<!-- Node90&#45;&gt;Node38 -->
+<!-- Node89&#45;&gt;Node38 -->
 <g id="edge178" class="edge">
-<title>Node90&#45;&gt;Node38</title>
+<title>Node89&#45;&gt;Node38</title>
 <path fill="none" stroke="#191970" d="M1112.59,-544.4532C1337.5871,-532.9346 2241.6266,-484.5576 2362,-445 2473.4689,-408.3687 2585.8766,-319.83 2629.2906,-283.2406"/>
 <polygon fill="#191970" stroke="#191970" points="2631.6894,-285.7949 2637.0327,-276.6462 2627.1504,-280.466 2631.6894,-285.7949"/>
 </g>
@@ -1419,9 +1419,9 @@
 </a>
 </g>
 </g>
-<!-- Node90&#45;&gt;Node73 -->
+<!-- Node89&#45;&gt;Node73 -->
 <g id="edge162" class="edge">
-<title>Node90&#45;&gt;Node73</title>
+<title>Node89&#45;&gt;Node73</title>
 <path fill="none" stroke="#191970" d="M1057.1558,-537.2208C1047.0295,-515.2509 1026.3598,-461.0869 1049,-425 1058.5946,-409.7069 1074.9867,-399.2983 1091.2377,-392.3094"/>
 <polygon fill="#191970" stroke="#191970" points="1092.7088,-395.492 1100.7373,-388.5786 1090.1499,-388.9764 1092.7088,-395.492"/>
 </g>
diff --git a/docs/reference/api/doxygen/bias__add_8h__incl.svg b/docs/reference/api/doxygen/bias__add_8h__incl.svg
index 640a135645..78f0eec9a5 100644
--- a/docs/reference/api/doxygen/bias__add_8h__incl.svg
+++ b/docs/reference/api/doxygen/bias__add_8h__incl.svg
@@ -58,33 +58,33 @@
 <path fill="none" stroke="#191970" d="M1391.2008,-1063.1389C1391.2008,-1041.9692 1391.2008,-1003.8174 1391.2008,-980.6112"/>
 <polygon fill="#191970" stroke="#191970" points="1394.7009,-980.5594 1391.2008,-970.5595 1387.7009,-980.5595 1394.7009,-980.5594"/>
 </g>
-<!-- Node92 -->
+<!-- Node91 -->
 <g id="node41" class="node">
-<title>Node92</title>
+<title>Node91</title>
 <g id="a_node41"><a xlink:href="tags_8h.html" target="_top" xlink:title="External function interface to rocBLAS libraries. ">
 <polygon fill="#ffffff" stroke="#000000" points="3034.7008,-890 3034.7008,-909 3125.7008,-909 3125.7008,-890 3034.7008,-890"/>
 <text text-anchor="middle" x="3080.2008" y="-897" font-family="Helvetica,sans-Serif" font-size="10.00" fill="#000000">tvm/topi/tags.h</text>
 </a>
 </g>
 </g>
-<!-- Node0&#45;&gt;Node92 -->
... 105630 lines suppressed ...